minimalist_grammar_parser/
lexicon.rs

1//! Module which defines the core functions to create or modify MG lexicons.
2
3#[cfg(feature = "pretty")]
4use serde::Serialize;
5
6use crate::Direction;
7use crate::parsing::PossibleTree;
8use chumsky::{extra::ParserExtra, label::LabelError, text::TextExpected, util::MaybeRef};
9use chumsky::{
10    prelude::*,
11    text::{inline_whitespace, newline},
12};
13use itertools::Itertools;
14
15use logprob::{LogProb, Softmax};
16
17use logprob::LogSumExp;
18
19use petgraph::Direction::Outgoing;
20use petgraph::dot::Dot;
21use petgraph::prelude::StableDiGraph;
22use petgraph::{
23    graph::NodeIndex,
24    visit::{EdgeRef, IntoNodeReferences},
25};
26use std::result::Result;
27use std::{
28    fmt::{Debug, Display},
29    hash::Hash,
30};
31use thiserror::Error;
32
33#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
34///A possible features of a lexical entry
35pub enum Feature<Category: Eq> {
36    ///The category of a lexical entry.
37    Category(Category),
38    ///Poss;ble complements or specifiers (e.g. =d or d=)
39    Selector(Category, Direction),
40    ///Head movement
41    Affix(Category, Direction),
42    ///Possible places for movers to go to (e.g. +wh)
43    Licensor(Category),
44    ///Marks that a lexical entry can move (e.g. -wh)
45    Licensee(Category),
46}
47
48impl<C: Eq> Feature<C> {
49    ///Get a reference to the inner category of a feature.
50    pub fn inner(&self) -> &C {
51        match self {
52            Feature::Category(c)
53            | Feature::Selector(c, _)
54            | Feature::Affix(c, _)
55            | Feature::Licensor(c)
56            | Feature::Licensee(c) => c,
57        }
58    }
59
60    ///Convert the [`Feature<C>`] to its inner category.
61    pub fn into_inner(self) -> C {
62        match self {
63            Feature::Category(c)
64            | Feature::Selector(c, _)
65            | Feature::Affix(c, _)
66            | Feature::Licensor(c)
67            | Feature::Licensee(c) => c,
68        }
69    }
70
71    ///Checks whether this features is a [`Feature::Selector`] or not.
72    pub fn is_selector(&self) -> bool {
73        matches!(self, Self::Selector(_, _))
74    }
75}
76
77#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
78///Returns either a licensee or category: used only in [`ParsingError`]
79#[allow(missing_docs)]
80pub enum LicenseeOrCategory<C> {
81    Licensee(C),
82    Category(C),
83}
84
85impl<C: Debug> Display for LicenseeOrCategory<C> {
86    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
87        match self {
88            LicenseeOrCategory::Licensee(c) => write!(f, "-{c:?}"),
89            LicenseeOrCategory::Category(c) => write!(f, "{c:?}"),
90        }
91    }
92}
93
94#[derive(Error, Clone, Copy, Debug)]
95///Indicates that there is something wrong a lexicon
96pub enum LexiconError {
97    ///Reference a node that doesn't exist
98    #[error("There is no node ({0:?})")]
99    MissingNode(NodeIndex),
100
101    ///Reference a lexeme that doesn't exist
102    #[error("{0:?} is not a lexeme")]
103    MissingLexeme(LexemeId),
104
105    ///Lexicon is malformed since a node isn't a descendent from the root.
106    #[error("Following the parents of node, ({0:?}), doesn't lead to root")]
107    DoesntGoToRoot(NodeIndex),
108
109    ///The parents of a node go beyond the root or go through a leaf.
110    #[error("Following the parents of node, ({0:?}), we pass through the root or another lemma")]
111    InvalidOrder(NodeIndex),
112}
113
114///Indicates a problem with parsing.
115#[derive(Error, Clone, Copy, Debug)]
116pub enum ParsingError<C>
117where
118    C: Debug,
119{
120    ///We try to get a licensor or category of a feature that is not in the grammar.
121    #[error("There is nothing with feature ({0})")]
122    NoLicensorOrCategory(LicenseeOrCategory<C>),
123}
124
125impl<A: Debug> ParsingError<A> {
126    ///Module to convert errors to their owned variants:
127    ///
128    ///```
129    /// # use minimalist_grammar_parser::ParsingError;
130    /// # use minimalist_grammar_parser::lexicon::LicenseeOrCategory;
131    /// let x: ParsingError<&str> = ParsingError::NoLicensorOrCategory(LicenseeOrCategory::Category("s"));
132    /// let y: ParsingError<String> = x.inner_into();
133    ///
134    ///```
135    pub fn inner_into<B: From<A> + Debug>(self: ParsingError<A>) -> ParsingError<B> {
136        match self {
137            ParsingError::NoLicensorOrCategory(LicenseeOrCategory::Licensee(c)) => {
138                ParsingError::NoLicensorOrCategory(LicenseeOrCategory::Licensee(c.into()))
139            }
140            ParsingError::NoLicensorOrCategory(LicenseeOrCategory::Category(c)) => {
141                ParsingError::NoLicensorOrCategory(LicenseeOrCategory::Category(c.into()))
142            }
143        }
144    }
145}
146
147fn grammar_parser<'src>()
148-> impl Parser<'src, &'src str, Lexicon<&'src str, &'src str>, extra::Err<Rich<'src, char>>> {
149    entry_parser::<extra::Err<Rich<'src, char>>>()
150        .separated_by(newline())
151        .allow_leading()
152        .allow_trailing()
153        .collect::<Vec<_>>()
154        .map(|x| Lexicon::new(x, true))
155        .then_ignore(end())
156}
157
158fn entry_parser<'src, E>() -> impl Parser<'src, &'src str, LexicalEntry<&'src str, &'src str>, E>
159where
160    E: ParserExtra<'src, &'src str>,
161    E::Error: LabelError<'src, &'src str, TextExpected<'src, &'src str>>
162        + LabelError<'src, &'src str, MaybeRef<'src, char>>
163        + LabelError<'src, &'src str, &'static str>,
164{
165    let feature_name = any()
166        .and_is(none_of([
167            '\t', '\n', ' ', '+', '-', '=', '>', '<', ':', '\r',
168        ]))
169        .repeated()
170        .at_least(1)
171        .labelled("feature name")
172        .to_slice();
173
174    let affix = choice((
175        feature_name
176            .then_ignore(just("<="))
177            .map(|x| Feature::Affix(x, Direction::Right))
178            .labelled("right affix"),
179        just("=>")
180            .ignore_then(feature_name)
181            .map(|x| Feature::Affix(x, Direction::Left))
182            .labelled("left affix"),
183    ));
184
185    let pre_category_features = choice((
186        feature_name
187            .then_ignore(just("="))
188            .map(|x| Feature::Selector(x, Direction::Right))
189            .labelled("right selector"),
190        just("=")
191            .ignore_then(feature_name)
192            .map(|x| Feature::Selector(x, Direction::Left))
193            .labelled("left selector"),
194        just("+")
195            .ignore_then(feature_name)
196            .map(Feature::Licensor)
197            .labelled("licensor"),
198    ));
199
200    choice((
201        just("ε").to(None),
202        (none_of(['\t', '\n', ' ', '+', '-', '=', ':', '\r'])
203            .repeated()
204            .at_least(1)
205            .to_slice())
206        .or_not(),
207    ))
208    .labelled("lemma")
209    .then_ignore(
210        just("::")
211            .padded_by(inline_whitespace())
212            .labelled("lemma feature seperator"),
213    )
214    .then(
215        affix
216            .or_not()
217            .then_ignore(inline_whitespace().or_not())
218            .then(
219                pre_category_features
220                    .separated_by(inline_whitespace())
221                    .allow_trailing()
222                    .collect::<Vec<_>>()
223                    .labelled("pre category features"),
224            )
225            .map(|(affix, mut feats)| {
226                if let Some(affix) = affix {
227                    feats.insert(0, affix);
228                }
229                feats
230            }),
231    )
232    .then(
233        feature_name
234            .map(Feature::Category)
235            .labelled("category feature"),
236    )
237    .then(
238        just('-')
239            .ignore_then(feature_name)
240            .map(Feature::Licensee)
241            .labelled("licensee")
242            .separated_by(inline_whitespace())
243            .allow_leading()
244            .allow_trailing()
245            .collect::<Vec<_>>()
246            .labelled("licensees"),
247    )
248    .map(|(((lemma, mut features), category), mut licensees)| {
249        features.push(category);
250        features.append(&mut licensees);
251        LexicalEntry::new(lemma, features)
252    })
253}
254
255#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
256pub(crate) enum FeatureOrLemma<T: Eq, Category: Eq> {
257    Root,
258    Lemma(Option<T>),
259    Feature(Feature<Category>),
260    Complement(Category, Direction),
261}
262
263impl<T: Eq, C: Eq> FeatureOrLemma<T, C> {
264    fn is_lemma(&self) -> bool {
265        matches!(self, FeatureOrLemma::Lemma(_))
266    }
267}
268
269#[cfg(feature = "sampling")]
270impl<T: Eq, C: Eq + Clone> FeatureOrLemma<T, C> {
271    fn to_complement_mut(&mut self) {
272        if let FeatureOrLemma::Feature(Feature::Selector(c, d)) = self {
273            *self = FeatureOrLemma::Complement(c.clone(), *d);
274        }
275    }
276}
277
278impl<T: Eq, Category: Eq> From<LexicalEntry<T, Category>> for Vec<FeatureOrLemma<T, Category>> {
279    fn from(value: LexicalEntry<T, Category>) -> Self {
280        let LexicalEntry { lemma, features } = value;
281        std::iter::once(lemma)
282            .map(|x| FeatureOrLemma::Lemma(x))
283            .chain(
284                features
285                    .into_iter()
286                    .enumerate()
287                    .map(|(i, feat)| match feat {
288                        //A feature is a complement iff its the first
289                        //selector (i.e. the moment the head first is merged)
290                        Feature::Selector(c, d) if i == 0 => FeatureOrLemma::Complement(c, d),
291                        _ => FeatureOrLemma::Feature(feat),
292                    }),
293            )
294            .collect()
295    }
296}
297
298#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
299///A representation of a lexical entry in a grammar.
300pub struct LexicalEntry<T: Eq, Category: Eq> {
301    pub(crate) lemma: Option<T>,
302    pub(crate) features: Vec<Feature<Category>>,
303}
304impl<T: Eq, Category: Eq> LexicalEntry<T, Category> {
305    ///Creates a new lexical entry
306    pub fn new(lemma: Option<T>, features: Vec<Feature<Category>>) -> LexicalEntry<T, Category> {
307        LexicalEntry { lemma, features }
308    }
309
310    ///Get the lemma (possibly `None`) of a lexical entry
311    pub fn lemma(&self) -> &Option<T> {
312        &self.lemma
313    }
314
315    ///Gets all features of a lexical entry
316    pub fn features(&self) -> &[Feature<Category>] {
317        &self.features
318    }
319
320    ///Change the type of a [`LexicalEntry`]
321    pub fn remap<'lex, T2: Eq, C2: Eq>(
322        &'lex self,
323        lemma_map: impl Fn(&'lex T) -> T2,
324        category_map: impl Fn(&'lex Category) -> C2,
325    ) -> LexicalEntry<T2, C2> {
326        let lemma = self.lemma.as_ref().map(lemma_map);
327        let features = self
328            .features
329            .iter()
330            .map(|x| match x {
331                Feature::Category(c) => Feature::Category(category_map(c)),
332                Feature::Selector(c, direction) => Feature::Selector(category_map(c), *direction),
333                Feature::Affix(c, direction) => Feature::Affix(category_map(c), *direction),
334                Feature::Licensor(c) => Feature::Licensor(category_map(c)),
335                Feature::Licensee(c) => Feature::Licensee(category_map(c)),
336            })
337            .collect();
338        LexicalEntry { lemma, features }
339    }
340
341    ///Gets the category of a lexical entry
342    pub fn category(&self) -> &Category {
343        let mut cat = None;
344        for lex in self.features.iter() {
345            if let Feature::Category(c) = lex {
346                cat = Some(c);
347                break;
348            }
349        }
350        cat.expect("All lexical entries must have one category")
351    }
352}
353
354impl<T: Display + PartialEq + Eq, Category: Display + PartialEq + Eq> Display
355    for LexicalEntry<T, Category>
356{
357    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
358        if let Some(t) = &self.lemma {
359            write!(f, "{t}::")?;
360        } else {
361            write!(f, "ε::")?;
362        }
363        for (i, feature) in self.features.iter().enumerate() {
364            if i > 0 {
365                write!(f, " ")?;
366            }
367            write!(f, "{feature}")?;
368        }
369        Ok(())
370    }
371}
372
373impl<Category: Display + Eq> Display for Feature<Category> {
374    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
375        match self {
376            Feature::Category(x) => write!(f, "{x}"),
377            Feature::Selector(x, Direction::Left) => write!(f, "={x}"),
378            Feature::Selector(x, Direction::Right) => write!(f, "{x}="),
379            Feature::Affix(x, Direction::Left) => write!(f, "=>{x}"),
380            Feature::Affix(x, Direction::Right) => write!(f, "{x}<="),
381            Feature::Licensor(x) => write!(f, "+{x}"),
382            Feature::Licensee(x) => write!(f, "-{x}"),
383        }
384    }
385}
386
387///The struct which represents a specific MG.
388///They can be created by parsing:
389///```
390///# use minimalist_grammar_parser::Lexicon;
391///let grammar: Lexicon<&str, &str> = Lexicon::from_string("John::d\nsaw::d= =d v\nMary::d")?;
392///# Ok::<(), anyhow::Error>(())
393///```
394///It is also possible to make a [`Lexicon`] which use different types for categories or lemmas, such as integers or [`String`]s.
395///These can be made with [`Lexicon::remap_lexicon`] or by passing a vector of [`LexicalEntry`]
396///using [`Lexicon::new`].
397///
398///## Lemmas
399///
400///Lemmas are representated as `Option<T>`, so empty strings are `None` rather than an empty
401///string to allow for arbitrary lemma type `T`.
402///
403///## Useful functions
404///
405/// - [`Lexicon::parse`] parse a string.
406/// - [`Lexicon::parse_multiple`] parse multiple strings simultaneously.
407/// - [`Lexicon::generate`] generate some of the strings of a grammar.
408/// - [`Lexicon::valid_continuations`] find the valid next tokens of a grammar given a prefix
409///
410///
411///## Representation
412///The struct represents a lexicon as a graph, as per Stabler (2013).
413///This means certain functions exploit this, such as [`Lexicon::n_nodes`] which doesn't reference
414///the number of features of the grammar as written out, but rather the number of nodes in the graph representation.
415///
416/// - Stabler, E. (2013). Two Models of Minimalist, Incremental Syntactic Analysis. Topics in Cognitive Science, 5(3), 611–633. <https://doi.org/10.1111/tops.12031>
417#[derive(Debug, Clone)]
418pub struct Lexicon<T: Eq, Category: Eq> {
419    graph: StableDiGraph<FeatureOrLemma<T, Category>, LogProb<f64>>,
420    root: NodeIndex,
421    leaves: Vec<LexemeId>,
422}
423
424#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
425///An ID for each lexeme in a grammar
426pub struct LexemeId(pub(crate) NodeIndex);
427
428#[cfg(feature = "pretty")]
429impl Serialize for LexemeId {
430    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
431    where
432        S: serde::Serializer,
433    {
434        use serde::ser::SerializeTupleStruct;
435
436        let mut s = serializer.serialize_tuple_struct("LexemeId", 1)?;
437        s.serialize_field(&self.0.index())?;
438        s.end()
439    }
440}
441
442impl<T: Eq, Category: Eq> PartialEq for Lexicon<T, Category> {
443    fn eq(&self, other: &Self) -> bool {
444        self.root == other.root
445            && self.leaves == other.leaves
446            && self.graph.node_weights().eq(other.graph.node_weights())
447            && self.graph.edge_weights().eq(other.graph.edge_weights())
448    }
449}
450impl<T: Eq, Category: Eq> Eq for Lexicon<T, Category> {}
451impl<T, Category> Lexicon<T, Category>
452where
453    T: Eq + std::fmt::Debug + Clone + Display,
454    Category: Eq + std::fmt::Debug + Clone + Display,
455{
456    ///Prints a lexicon as a GraphViz dot file.
457    pub fn graphviz(&self) -> String {
458        let dot = Dot::new(&self.graph);
459        format!("{dot}")
460    }
461}
462
463fn renormalise_weights<T: Eq + Clone, C: Eq + Clone>(
464    mut graph: StableDiGraph<FeatureOrLemma<T, C>, f64>,
465) -> StableDiGraph<FeatureOrLemma<T, C>, LogProb<f64>> {
466    //Renormalise probabilities to sum to one.
467    for node_index in graph.node_indices().collect_vec() {
468        let edges: Vec<_> = graph
469            .edges_directed(node_index, petgraph::Direction::Outgoing)
470            .map(|e| (*e.weight(), e.id()))
471            .collect();
472
473        match edges.len().cmp(&1) {
474            std::cmp::Ordering::Equal => {
475                for (_, edge) in edges {
476                    graph[edge] = 0.0;
477                }
478            }
479            std::cmp::Ordering::Greater => {
480                let dist = edges
481                    .iter()
482                    .map(|(w, _edge)| *w)
483                    .softmax()
484                    .unwrap()
485                    .map(|x| x.into_inner());
486
487                for (new_weight, (_weight, edge)) in dist.zip(edges.iter()) {
488                    graph[*edge] = new_weight;
489                }
490            }
491            _ => (),
492        }
493    }
494
495    //TODO: Get rid of this annoying clone.
496    graph.map(|_, n| n.clone(), |_, e| LogProb::new(*e).unwrap())
497}
498
499impl<T: Eq + std::fmt::Debug + Clone, Category: Eq + std::fmt::Debug + Clone> Lexicon<T, Category> {
500    pub(crate) fn possible_heads(
501        &self,
502        nx: NodeIndex,
503        depth: usize,
504    ) -> Result<PossibleTree, ParsingError<Category>> {
505        if depth > 10 {
506            return Ok(PossibleTree::empty());
507        }
508        let Some(FeatureOrLemma::Feature(Feature::Affix(category, direction))) =
509            self.graph.node_weight(nx)
510        else {
511            panic!("Node must be an affix!")
512        };
513
514        let mut children = vec![];
515        let mut lemmas = vec![];
516        let x: NodeIndex = self.find_category(category)?;
517
518        let mut stack = vec![x];
519
520        while let Some(nx) = stack.pop() {
521            match self.graph.node_weight(nx).unwrap() {
522                FeatureOrLemma::Feature(Feature::Affix(..)) => {
523                    children.push(self.possible_heads(nx, depth + 1)?)
524                }
525                FeatureOrLemma::Lemma(_) => lemmas.push(LexemeId(nx)),
526                _ => stack.extend(self.children_of(nx)),
527            }
528        }
529        children.push(PossibleTree::just_heads(lemmas));
530
531        let heads: Vec<LexemeId> = self
532            .children_of(nx)
533            .filter_map(|child| {
534                if matches!(
535                    self.graph.node_weight(child).unwrap(),
536                    FeatureOrLemma::Lemma(_)
537                ) {
538                    Some(LexemeId(child))
539                } else {
540                    None
541                }
542            })
543            .collect();
544        Ok(PossibleTree::just_heads(heads).merge(children, *direction))
545    }
546}
547
548///Iterator that climbs up a graph until it reaches the root.
549pub struct Climber<'a, T: Eq, C: Eq> {
550    lex: &'a Lexicon<T, C>,
551    pos: NodeIndex,
552}
553
554impl<'a, T: Eq, C: Eq + Clone> Iterator for Climber<'a, T, C> {
555    type Item = Feature<C>;
556
557    fn next(&mut self) -> Option<Self::Item> {
558        self.pos = self.lex.parent_of(self.pos)?;
559
560        match self.lex.graph.node_weight(self.pos) {
561            Some(x) => match x {
562                FeatureOrLemma::Root => None,
563                FeatureOrLemma::Lemma(_) => None,
564                FeatureOrLemma::Feature(feature) => Some(feature.clone()),
565                FeatureOrLemma::Complement(c, direction) => {
566                    Some(Feature::Selector(c.clone(), *direction))
567                }
568            },
569            None => None,
570        }
571    }
572}
573
574pub(crate) fn fix_weights_per_node<T: Eq, C: Eq>(
575    graph: &mut StableDiGraph<FeatureOrLemma<T, C>, LogProb<f64>>,
576    node_index: NodeIndex,
577) {
578    let sum = graph
579        .edges_directed(node_index, Outgoing)
580        .map(|x| x.weight())
581        .log_sum_exp_float_no_alloc();
582
583    if sum != 0.0 {
584        let edges: Vec<_> = graph
585            .edges_directed(node_index, Outgoing)
586            .map(|e| e.id())
587            .collect();
588        for e in edges {
589            graph[e] = LogProb::new(graph[e].into_inner() - sum).unwrap();
590        }
591    }
592}
593
594pub(crate) fn fix_weights<T: Eq + Clone, C: Eq + Clone>(
595    graph: &mut StableDiGraph<FeatureOrLemma<T, C>, LogProb<f64>>,
596) {
597    //Renormalise probabilities to sum to one.
598    for node_index in graph.node_indices().collect_vec() {
599        fix_weights_per_node(graph, node_index);
600    }
601}
602
603impl<T: Eq, Category: Eq> Lexicon<T, Category> {
604    pub(crate) fn add_lexical_entry(
605        &mut self,
606        lexical_entry: LexicalEntry<T, Category>,
607    ) -> Option<LexemeId> {
608        let LexicalEntry { lemma, features } = lexical_entry;
609
610        let mut new_nodes = vec![FeatureOrLemma::Lemma(lemma)];
611        for (i, f) in features.into_iter().enumerate() {
612            let f = if i == 0 && f.is_selector() {
613                let Feature::Selector(c, d) = f else {
614                    panic!("We checked it was a selector!")
615                };
616                FeatureOrLemma::Complement(c, d)
617            } else {
618                FeatureOrLemma::Feature(f)
619            };
620            new_nodes.push(f);
621        }
622
623        let mut pos = self.root;
624        while let Some(f) = new_nodes.pop() {
625            if let Some(p) = self.children_of(pos).find(|x| self.find(*x) == Some(&f)) {
626                pos = p;
627            } else {
628                let new_pos = self.graph.add_node(f);
629                self.graph.add_edge(pos, new_pos, LogProb::prob_of_one());
630                fix_weights_per_node(&mut self.graph, pos);
631                pos = new_pos;
632            }
633        }
634        if self.leaves.contains(&LexemeId(pos)) {
635            None
636        } else {
637            self.leaves.push(LexemeId(pos));
638            Some(LexemeId(pos))
639        }
640    }
641
642    ///The number of children of a node
643    pub(crate) fn n_children(&self, nx: NodeIndex) -> usize {
644        self.graph
645            .edges_directed(nx, petgraph::Direction::Outgoing)
646            .count()
647    }
648
649    ///The children of a node
650    pub(crate) fn children_of(&self, nx: NodeIndex) -> impl Iterator<Item = NodeIndex> + '_ {
651        self.graph
652            .edges_directed(nx, petgraph::Direction::Outgoing)
653            .map(|e| e.target())
654    }
655
656    ///Finds a node's feature
657    pub(crate) fn find(&self, nx: NodeIndex) -> Option<&FeatureOrLemma<T, Category>> {
658        self.graph.node_weight(nx)
659    }
660
661    ///Climb up a node over all of its features
662    pub(crate) fn node_to_features<'a>(&'a self, nx: NodeIndex) -> Climber<'a, T, Category> {
663        Climber { lex: self, pos: nx }
664    }
665
666    ///Climb up a lexeme over all of its features
667    pub fn leaf_to_features<'a>(&'a self, lexeme: LexemeId) -> Option<Climber<'a, T, Category>> {
668        if !matches!(
669            self.graph.node_weight(lexeme.0),
670            Some(FeatureOrLemma::Lemma(_))
671        ) {
672            return None;
673        }
674
675        Some(Climber {
676            lex: self,
677            pos: lexeme.0,
678        })
679    }
680
681    ///Gets the lemma of a lexeme.
682    pub fn leaf_to_lemma(&self, lexeme_id: LexemeId) -> Option<&Option<T>> {
683        match self.graph.node_weight(lexeme_id.0) {
684            Some(x) => {
685                if let FeatureOrLemma::Lemma(l) = x {
686                    Some(l)
687                } else {
688                    None
689                }
690            }
691            None => None,
692        }
693    }
694
695    ///Get the category of a lexeme.
696    pub fn category(&self, lexeme_id: LexemeId) -> Option<&Category> {
697        let mut pos = lexeme_id.0;
698        while !matches!(
699            self.graph[pos],
700            FeatureOrLemma::Feature(Feature::Category(_))
701        ) {
702            let new_pos = self.parent_of(pos)?;
703            pos = new_pos
704        }
705        let FeatureOrLemma::Feature(Feature::Category(cat)) = &self.graph[pos] else {
706            return None;
707        };
708        Some(cat)
709    }
710
711    ///Get the parent of a node.
712    pub(crate) fn parent_of(&self, nx: NodeIndex) -> Option<NodeIndex> {
713        self.graph
714            .edges_directed(nx, petgraph::Direction::Incoming)
715            .next()
716            .map(|e| e.source())
717    }
718}
719
720impl<T: Eq + std::fmt::Debug + Clone, Category: Eq + std::fmt::Debug + Clone> Lexicon<T, Category> {
721    ///Checks if `nx` is a complement
722    pub fn is_complement(&self, nx: NodeIndex) -> bool {
723        matches!(
724            self.graph.node_weight(nx).unwrap(),
725            FeatureOrLemma::Complement(_, _) | FeatureOrLemma::Feature(Feature::Affix(_, _))
726        )
727    }
728
729    ///The number of nodes in a lexicon.
730    pub fn n_nodes(&self) -> usize {
731        self.graph.node_count()
732    }
733
734    ///Returns the leaves of a grammar
735    pub fn leaves(&self) -> &[LexemeId] {
736        &self.leaves
737    }
738
739    ///Returns all leaves with their sibling nodes (e.g. exemes that are identical except for
740    ///their lemma)
741    pub fn sibling_leaves(&self) -> Vec<Vec<LexemeId>> {
742        let mut leaves = self.leaves.clone();
743        let mut result = vec![];
744        while let Some(leaf) = leaves.pop() {
745            let siblings: Vec<_> = self
746                .children_of(self.parent_of(leaf.0).unwrap())
747                .filter_map(|a| {
748                    if matches!(self.graph.node_weight(a).unwrap(), FeatureOrLemma::Lemma(_)) {
749                        Some(LexemeId(a))
750                    } else {
751                        None
752                    }
753                })
754                .collect();
755            leaves.retain(|x| !siblings.contains(x));
756            result.push(siblings);
757        }
758        result
759    }
760
761    ///Gets each lexical entry along with its ID.
762    pub fn lexemes_and_ids(
763        &self,
764    ) -> Result<impl Iterator<Item = (LexemeId, LexicalEntry<T, Category>)>, LexiconError> {
765        Ok(self.leaves().iter().copied().zip(self.lexemes()?))
766    }
767
768    ///Turns a lexicon into a `Vec<LexicalEntry<T, Category>>` which can be useful for printing or
769    ///investigating individual lexical entries.
770    pub fn lexemes(&self) -> Result<Vec<LexicalEntry<T, Category>>, LexiconError> {
771        let mut v = vec![];
772
773        //NOTE: Must guarantee to iterate in this order.
774        for leaf in self.leaves.iter() {
775            if let FeatureOrLemma::Lemma(lemma) = &self.graph[leaf.0] {
776                let mut features = vec![];
777                let mut nx = leaf.0;
778                while let Some(parent) = self.parent_of(nx) {
779                    if parent == self.root {
780                        break;
781                    } else if let FeatureOrLemma::Feature(f) = &self.graph[parent] {
782                        features.push(f.clone());
783                    } else if let FeatureOrLemma::Complement(c, d) = &self.graph[parent] {
784                        features.push(Feature::Selector(c.clone(), *d));
785                    }
786                    nx = parent;
787                }
788
789                v.push(LexicalEntry {
790                    lemma: lemma.as_ref().cloned(),
791                    features,
792                })
793            } else {
794                return Err(LexiconError::MissingLexeme(*leaf));
795            }
796        }
797        Ok(v)
798    }
799
800    ///Given a leaf node, return its [`LexicalEntry`]
801    pub fn get_lexical_entry(
802        &self,
803        lexeme_id: LexemeId,
804    ) -> Result<LexicalEntry<T, Category>, LexiconError> {
805        let Some(lemma) = self.graph.node_weight(lexeme_id.0) else {
806            return Err(LexiconError::MissingLexeme(lexeme_id));
807        };
808        let lemma = match lemma {
809            FeatureOrLemma::Lemma(lemma) => lemma,
810            FeatureOrLemma::Feature(_)
811            | FeatureOrLemma::Root
812            | FeatureOrLemma::Complement(_, _) => {
813                return Err(LexiconError::MissingLexeme(lexeme_id));
814            }
815        }
816        .clone();
817
818        let features = self.leaf_to_features(lexeme_id).unwrap().collect();
819        Ok(LexicalEntry { features, lemma })
820    }
821
822    ///Get all lemmas of a grammar
823    pub fn lemmas(&self) -> impl Iterator<Item = &Option<T>> {
824        self.leaves.iter().filter_map(|x| match &self.graph[x.0] {
825            FeatureOrLemma::Lemma(x) => Some(x),
826            _ => None,
827        })
828    }
829
830    ///Create a new grammar from a [`Vec`] of [`LexicalEntry`]
831    pub fn new(items: Vec<LexicalEntry<T, Category>>, collapse_lemmas: bool) -> Self {
832        let n_items = items.len();
833        Self::new_with_weights(
834            items,
835            std::iter::repeat_n(1.0, n_items).collect(),
836            collapse_lemmas,
837        )
838    }
839
840    ///Create a new grammar from a [`Vec`] of [`LexicalEntry`] where lexical entries are weighted
841    ///in probability
842    pub fn new_with_weights(
843        items: Vec<LexicalEntry<T, Category>>,
844        weights: Vec<f64>,
845        collapse_lemmas: bool,
846    ) -> Self {
847        let mut graph = StableDiGraph::new();
848        let root_index = graph.add_node(FeatureOrLemma::Root);
849        let mut leaves = vec![];
850
851        for (lexeme, weight) in items.into_iter().zip(weights.into_iter()) {
852            let lexeme: Vec<FeatureOrLemma<T, Category>> = lexeme.into();
853            let mut node_index = root_index;
854
855            for feature in lexeme.into_iter().rev() {
856                //We go down the feature list and add nodes and edges corresponding to features
857                //If they exist already, we just follow the path until we have to start adding.
858                node_index = if (!feature.is_lemma() || collapse_lemmas)
859                    && let Some((nx, edge_index)) = graph
860                        .edges_directed(node_index, petgraph::Direction::Outgoing)
861                        .find(|x| graph[x.target()] == feature)
862                        .map(|x| (x.target(), x.id()))
863                {
864                    graph[edge_index] += weight;
865                    nx
866                } else {
867                    let new_node_index = graph.add_node(feature);
868                    graph.add_edge(node_index, new_node_index, weight);
869                    new_node_index
870                };
871
872                if let FeatureOrLemma::Lemma(_) = graph[node_index] {
873                    leaves.push(LexemeId(node_index));
874                };
875            }
876        }
877        let graph = renormalise_weights(graph);
878        Lexicon {
879            graph,
880            leaves,
881            root: root_index,
882        }
883    }
884
885    ///Get the node corresponding to a category
886    pub(crate) fn find_category(
887        &self,
888        category: &Category,
889    ) -> Result<NodeIndex, ParsingError<Category>> {
890        match self
891            .graph
892            .neighbors_directed(self.root, petgraph::Direction::Outgoing)
893            .find(|i| match &self.graph[*i] {
894                FeatureOrLemma::Feature(Feature::Category(c)) => c == category,
895                _ => false,
896            }) {
897            Some(x) => Ok(x),
898            None => Err(ParsingError::NoLicensorOrCategory(
899                LicenseeOrCategory::Category(category.clone()),
900            )),
901        }
902    }
903
904    ///Get the node corresponding to a licensee
905    pub(crate) fn find_licensee(
906        &self,
907        category: &Category,
908    ) -> Result<NodeIndex, ParsingError<Category>> {
909        match self
910            .graph
911            .neighbors_directed(self.root, petgraph::Direction::Outgoing)
912            .find(|i| match &self.graph[*i] {
913                FeatureOrLemma::Feature(Feature::Licensee(c)) => c == category,
914                _ => false,
915            }) {
916            Some(x) => Ok(x),
917            None => Err(ParsingError::NoLicensorOrCategory(
918                LicenseeOrCategory::Licensee(category.clone()),
919            )),
920        }
921    }
922
923    pub(crate) fn get(
924        &self,
925        nx: NodeIndex,
926    ) -> Option<(&FeatureOrLemma<T, Category>, LogProb<f64>)> {
927        if let Some(x) = self.graph.node_weight(nx) {
928            let p = self
929                .graph
930                .edges_directed(nx, petgraph::Direction::Incoming)
931                .next()
932                .unwrap()
933                .weight();
934            Some((x, *p))
935        } else {
936            None
937        }
938    }
939
940    ///Get the feature of a node, if it has one.
941    pub(crate) fn get_feature_category(&self, nx: NodeIndex) -> Option<&Category> {
942        self.graph.node_weight(nx).and_then(|x| match x {
943            FeatureOrLemma::Root => None,
944            FeatureOrLemma::Lemma(_) => None,
945            FeatureOrLemma::Feature(feature) => match feature {
946                Feature::Category(c)
947                | Feature::Selector(c, _)
948                | Feature::Licensor(c)
949                | Feature::Licensee(c)
950                | Feature::Affix(c, _) => Some(c),
951            },
952            FeatureOrLemma::Complement(c, _) => Some(c),
953        })
954    }
955
956    ///Iterate over all categories of a grammar
957    pub fn categories(&self) -> impl Iterator<Item = &Category> {
958        self.graph.node_references().filter_map(|(_, x)| match x {
959            FeatureOrLemma::Feature(Feature::Category(x)) => Some(x),
960            _ => None,
961        })
962    }
963
964    ///Iterate over all licensors of a grammar
965    pub fn licensor_types(&self) -> impl Iterator<Item = &Category> {
966        self.graph.node_references().filter_map(|(_, x)| match x {
967            FeatureOrLemma::Feature(Feature::Licensor(x))
968            | FeatureOrLemma::Feature(Feature::Licensee(x)) => Some(x),
969            _ => None,
970        })
971    }
972}
973
974impl<'src> Lexicon<&'src str, &'src str> {
975    ///Parse a grammar and return a [`Lexicon<&str, &str>`]
976    pub fn from_string(s: &'src str) -> Result<Self, LexiconParsingError<'src>> {
977        grammar_parser()
978            .padded()
979            .then_ignore(end())
980            .parse(s)
981            .into_result()
982            .map_err(|x| x.into())
983    }
984}
985
986impl<T: Eq, C: Eq> Lexicon<T, C> {
987    ///Converts a `Lexicon<T,C>` to a `Lexicon<T2, C2>` according to two functions which remap
988    ///them.
989    pub fn remap_lexicon<'lex, T2: Eq, C2: Eq>(
990        &'lex self,
991        lemma_map: impl Fn(&'lex T) -> T2,
992        category_map: impl Fn(&'lex C) -> C2,
993    ) -> Lexicon<T2, C2> {
994        let Lexicon {
995            graph,
996            root,
997            leaves,
998        } = self;
999
1000        //Per StableGraph documentation, the node indices are still valid on the mapped data
1001        //structure.
1002        let graph = graph.map(
1003            |_, x| match x {
1004                FeatureOrLemma::Root => FeatureOrLemma::Root,
1005                FeatureOrLemma::Lemma(Some(s)) => FeatureOrLemma::Lemma(Some(lemma_map(s))),
1006                FeatureOrLemma::Lemma(None) => FeatureOrLemma::Lemma(None),
1007                FeatureOrLemma::Feature(Feature::Category(c)) => {
1008                    FeatureOrLemma::Feature(Feature::Category(category_map(c)))
1009                }
1010                FeatureOrLemma::Feature(Feature::Licensor(c)) => {
1011                    FeatureOrLemma::Feature(Feature::Licensor(category_map(c)))
1012                }
1013                FeatureOrLemma::Feature(Feature::Licensee(c)) => {
1014                    FeatureOrLemma::Feature(Feature::Licensee(category_map(c)))
1015                }
1016                FeatureOrLemma::Feature(Feature::Affix(c, d)) => {
1017                    FeatureOrLemma::Feature(Feature::Affix(category_map(c), *d))
1018                }
1019                FeatureOrLemma::Feature(Feature::Selector(c, d)) => {
1020                    FeatureOrLemma::Feature(Feature::Selector(category_map(c), *d))
1021                }
1022                FeatureOrLemma::Complement(c, direction) => {
1023                    FeatureOrLemma::Complement(category_map(c), *direction)
1024                }
1025            },
1026            |_, e| *e,
1027        );
1028        Lexicon {
1029            graph,
1030            root: *root,
1031            leaves: leaves.clone(),
1032        }
1033    }
1034}
1035
1036impl<'a> Lexicon<&'a str, &'a str> {
1037    ///Converts from `Lexicon<&str, &str>` to `Lexicon<String, String>`
1038    pub fn to_owned_values(&self) -> Lexicon<String, String> {
1039        let Lexicon {
1040            graph,
1041            root,
1042            leaves,
1043        } = self;
1044
1045        let graph = graph.map(
1046            |_, x| match *x {
1047                FeatureOrLemma::Root => FeatureOrLemma::Root,
1048                FeatureOrLemma::Lemma(s) => FeatureOrLemma::Lemma(s.map(|x| x.to_owned())),
1049                FeatureOrLemma::Feature(Feature::Category(c)) => {
1050                    FeatureOrLemma::Feature(Feature::Category(c.to_owned()))
1051                }
1052                FeatureOrLemma::Feature(Feature::Affix(c, d)) => {
1053                    FeatureOrLemma::Feature(Feature::Affix(c.to_owned(), d))
1054                }
1055                FeatureOrLemma::Feature(Feature::Licensor(c)) => {
1056                    FeatureOrLemma::Feature(Feature::Licensor(c.to_owned()))
1057                }
1058                FeatureOrLemma::Feature(Feature::Licensee(c)) => {
1059                    FeatureOrLemma::Feature(Feature::Licensee(c.to_owned()))
1060                }
1061                FeatureOrLemma::Feature(Feature::Selector(c, d)) => {
1062                    FeatureOrLemma::Feature(Feature::Selector(c.to_owned(), d))
1063                }
1064                FeatureOrLemma::Complement(c, direction) => {
1065                    FeatureOrLemma::Complement(c.to_owned(), direction)
1066                }
1067            },
1068            |_, e| *e,
1069        );
1070        Lexicon {
1071            graph,
1072            root: *root,
1073            leaves: leaves.clone(),
1074        }
1075    }
1076}
1077
1078///Problem parsing a grammar; returns a vector of all [`Rich`] errors from Chumsky
1079#[derive(Error, Debug, Clone)]
1080pub struct LexiconParsingError<'a>(pub Vec<Rich<'a, char>>);
1081
1082impl<'a> Display for LexiconParsingError<'a> {
1083    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1084        writeln!(
1085            f,
1086            "{}",
1087            self.0
1088                .iter()
1089                .map(|e| e.to_string())
1090                .collect::<Vec<_>>()
1091                .join("\n")
1092        )
1093    }
1094}
1095
1096impl<'a> From<Vec<Rich<'a, char>>> for LexiconParsingError<'a> {
1097    fn from(value: Vec<Rich<'a, char>>) -> Self {
1098        LexiconParsingError(value)
1099    }
1100}
1101
1102impl LexicalEntry<&str, &str> {
1103    ///Parses a single lexical entry and returns it as a [`LexicalEntry`].
1104    pub fn parse<'a>(
1105        s: &'a str,
1106    ) -> Result<LexicalEntry<&'a str, &'a str>, LexiconParsingError<'a>> {
1107        entry_parser::<extra::Err<Rich<char>>>()
1108            .parse(s)
1109            .into_result()
1110            .map_err(|e| e.into())
1111    }
1112}
1113
1114impl<T, C> std::fmt::Display for FeatureOrLemma<T, C>
1115where
1116    T: Eq + Display,
1117    C: Eq + Display,
1118{
1119    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1120        match self {
1121            FeatureOrLemma::Root => write!(f, "root"),
1122            FeatureOrLemma::Lemma(lemma) => {
1123                if let Some(l) = lemma {
1124                    write!(f, "{l}")
1125                } else {
1126                    write!(f, "ε")
1127                }
1128            }
1129            FeatureOrLemma::Feature(feature) => write!(f, "{feature}"),
1130            FeatureOrLemma::Complement(c, d) => write!(f, "{}", Feature::Selector(c, *d)),
1131        }
1132    }
1133}
1134
1135impl<T, C> Display for Lexicon<T, C>
1136where
1137    T: Eq + Display + std::fmt::Debug + Clone,
1138    C: Eq + Display + std::fmt::Debug + Clone,
1139{
1140    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1141        write!(
1142            f,
1143            "{}",
1144            self.lexemes()
1145                .unwrap()
1146                .iter()
1147                .map(|l| l.to_string())
1148                .join("\n")
1149        )
1150    }
1151}
1152
1153#[cfg(feature = "semantics")]
1154mod semantics;
1155
1156#[cfg(feature = "semantics")]
1157pub use semantics::SemanticLexicon;
1158
1159pub mod mdl;
1160
1161#[cfg(feature = "sampling")]
1162pub mod mutations;
1163
1164#[cfg(test)]
1165mod tests {
1166
1167    use std::collections::HashSet;
1168
1169    use anyhow::Result;
1170
1171    use super::*;
1172
1173    #[test]
1174    fn categories() -> Result<()> {
1175        let lex = Lexicon::from_string("::V= +Z C -W")?;
1176
1177        assert_eq!(vec![&"C"], lex.categories().collect::<Vec<_>>());
1178        let mut lice = lex.licensor_types().collect::<Vec<_>>();
1179        lice.sort();
1180        assert_eq!(vec![&"W", &"Z"], lice);
1181        Ok(())
1182    }
1183
1184    type SimpleLexicalEntry<'a> = LexicalEntry<&'a str, &'a str>;
1185    #[test]
1186    fn parsing() {
1187        assert_eq!(
1188            SimpleLexicalEntry::parse("John::d").unwrap(),
1189            SimpleLexicalEntry {
1190                lemma: Some("John"),
1191                features: vec![Feature::Category("d")]
1192            }
1193        );
1194        assert_eq!(
1195            SimpleLexicalEntry::parse("eats::d= =d V").unwrap(),
1196            SimpleLexicalEntry {
1197                lemma: Some("eats"),
1198                features: vec![
1199                    Feature::Selector("d", Direction::Right),
1200                    Feature::Selector("d", Direction::Left),
1201                    Feature::Category("V")
1202                ]
1203            }
1204        );
1205        assert_eq!(
1206            SimpleLexicalEntry::parse("::d= =d V").unwrap(),
1207            SimpleLexicalEntry::new(
1208                None,
1209                vec![
1210                    Feature::Selector("d", Direction::Right),
1211                    Feature::Selector("d", Direction::Left),
1212                    Feature::Category("V")
1213                ]
1214            )
1215        );
1216        assert_eq!(
1217            SimpleLexicalEntry::parse("ε::d= =d V").unwrap(),
1218            SimpleLexicalEntry {
1219                lemma: None,
1220                features: vec![
1221                    Feature::Selector("d", Direction::Right),
1222                    Feature::Selector("d", Direction::Left),
1223                    Feature::Category("V")
1224                ]
1225            }
1226        );
1227        assert_eq!(
1228            SimpleLexicalEntry::parse("ε::d<= V").unwrap(),
1229            SimpleLexicalEntry {
1230                lemma: None,
1231                features: vec![
1232                    Feature::Affix("d", Direction::Right),
1233                    Feature::Category("V")
1234                ]
1235            }
1236        );
1237    }
1238
1239    #[test]
1240    fn get_lexical_entry() -> anyhow::Result<()> {
1241        let entries: HashSet<_> = STABLER2011
1242            .split('\n')
1243            .map(SimpleLexicalEntry::parse)
1244            .collect::<Result<_, LexiconParsingError>>()?;
1245        let lex = Lexicon::from_string(STABLER2011)?;
1246        for nx in &lex.leaves {
1247            let lexical_entry = lex.get_lexical_entry(*nx)?;
1248            assert!(entries.contains(&lexical_entry));
1249        }
1250        Ok(())
1251    }
1252    #[test]
1253    fn get_category() {
1254        assert_eq!(
1255            *SimpleLexicalEntry::parse("eats::d= =d V")
1256                .unwrap()
1257                .category(),
1258            "V"
1259        );
1260        assert_eq!(
1261            *SimpleLexicalEntry::parse("eats::d= V -d")
1262                .unwrap()
1263                .category(),
1264            "V"
1265        );
1266        assert_eq!(
1267            *SimpleLexicalEntry::parse("eats::C -d").unwrap().category(),
1268            "C"
1269        );
1270        assert_eq!(
1271            *SimpleLexicalEntry::parse("eats::+w Z -d")
1272                .unwrap()
1273                .category(),
1274            "Z"
1275        );
1276    }
1277
1278    #[test]
1279    fn convert_to_vec() {
1280        let x: Vec<FeatureOrLemma<_, _>> =
1281            SimpleLexicalEntry::parse("eats::d= =d V").unwrap().into();
1282        assert_eq!(
1283            x,
1284            vec![
1285                FeatureOrLemma::Lemma(Some("eats")),
1286                FeatureOrLemma::Complement("d", Direction::Right),
1287                FeatureOrLemma::Feature(Feature::Selector("d", Direction::Left)),
1288                FeatureOrLemma::Feature(Feature::Category("V")),
1289            ]
1290        );
1291    }
1292
1293    use crate::grammars::{COPY_LANGUAGE, STABLER2011};
1294    use petgraph::dot::Dot;
1295
1296    #[test]
1297    fn category() -> anyhow::Result<()> {
1298        let lex = Lexicon::from_string(STABLER2011)?;
1299        let leaves = lex
1300            .leaves()
1301            .iter()
1302            .map(|&x| (*lex.leaf_to_lemma(x).unwrap(), *lex.category(x).unwrap()))
1303            .collect::<Vec<_>>();
1304        assert_eq!(
1305            leaves,
1306            vec![
1307                (None, "C"),
1308                (None, "C"),
1309                (Some("knows"), "V"),
1310                (Some("says"), "V"),
1311                (Some("prefers"), "V"),
1312                (Some("drinks"), "V"),
1313                (Some("king"), "N"),
1314                (Some("wine"), "N"),
1315                (Some("beer"), "N"),
1316                (Some("queen"), "N"),
1317                (Some("the"), "D"),
1318                (Some("which"), "D")
1319            ]
1320        );
1321        Ok(())
1322    }
1323
1324    #[test]
1325    fn siblings() -> anyhow::Result<()> {
1326        let lex = Lexicon::from_string(STABLER2011)?;
1327        let siblings = lex.sibling_leaves();
1328        let siblings = siblings
1329            .into_iter()
1330            .map(|x| {
1331                x.into_iter()
1332                    .map(|x| *lex.leaf_to_lemma(x).unwrap())
1333                    .collect::<Vec<_>>()
1334            })
1335            .collect::<Vec<_>>();
1336        assert_eq!(
1337            siblings,
1338            vec![
1339                vec![Some("which")],
1340                vec![Some("the")],
1341                vec![Some("queen"), Some("beer"), Some("wine"), Some("king")],
1342                vec![Some("drinks"), Some("prefers")],
1343                vec![Some("says"), Some("knows")],
1344                vec![None],
1345                vec![None]
1346            ]
1347        );
1348        Ok(())
1349    }
1350
1351    #[test]
1352    fn initialize_lexicon() -> anyhow::Result<()> {
1353        let strings: Vec<&str> = STABLER2011.split('\n').collect();
1354
1355        let lex = Lexicon::from_string(STABLER2011)?;
1356        for lex in lex.lexemes()? {
1357            assert!(
1358                strings.contains(&format!("{}", lex).as_str())
1359                    || strings.contains(&format!("{}", lex).replace('ε', "").as_str())
1360            )
1361        }
1362        let lex_2 = Lexicon::new(lex.lexemes().unwrap(), false);
1363        assert_eq!(lex, lex_2);
1364        assert_eq!(
1365            format!("{}", Dot::new(&lex.graph)),
1366            "digraph {
1367    0 [ label = \"root\" ]
1368    1 [ label = \"C\" ]
1369    2 [ label = \"V=\" ]
1370    3 [ label = \"ε\" ]
1371    4 [ label = \"+W\" ]
1372    5 [ label = \"V=\" ]
1373    6 [ label = \"ε\" ]
1374    7 [ label = \"V\" ]
1375    8 [ label = \"=D\" ]
1376    9 [ label = \"C=\" ]
1377    10 [ label = \"knows\" ]
1378    11 [ label = \"says\" ]
1379    12 [ label = \"D=\" ]
1380    13 [ label = \"prefers\" ]
1381    14 [ label = \"drinks\" ]
1382    15 [ label = \"N\" ]
1383    16 [ label = \"king\" ]
1384    17 [ label = \"wine\" ]
1385    18 [ label = \"beer\" ]
1386    19 [ label = \"queen\" ]
1387    20 [ label = \"D\" ]
1388    21 [ label = \"N=\" ]
1389    22 [ label = \"the\" ]
1390    23 [ label = \"-W\" ]
1391    24 [ label = \"D\" ]
1392    25 [ label = \"N=\" ]
1393    26 [ label = \"which\" ]
1394    0 -> 1 [ label = \"-2.8042006992676702\" ]
1395    1 -> 2 [ label = \"-0.6931471805599453\" ]
1396    2 -> 3 [ label = \"0\" ]
1397    1 -> 4 [ label = \"-0.6931471805599453\" ]
1398    4 -> 5 [ label = \"0\" ]
1399    5 -> 6 [ label = \"0\" ]
1400    0 -> 7 [ label = \"-0.8042006992676702\" ]
1401    7 -> 8 [ label = \"0\" ]
1402    8 -> 9 [ label = \"-0.6931471805599454\" ]
1403    9 -> 10 [ label = \"-0.6931471805599453\" ]
1404    9 -> 11 [ label = \"-0.6931471805599453\" ]
1405    8 -> 12 [ label = \"-0.6931471805599454\" ]
1406    12 -> 13 [ label = \"-0.6931471805599453\" ]
1407    12 -> 14 [ label = \"-0.6931471805599453\" ]
1408    0 -> 15 [ label = \"-0.8042006992676702\" ]
1409    15 -> 16 [ label = \"-1.3862943611198906\" ]
1410    15 -> 17 [ label = \"-1.3862943611198906\" ]
1411    15 -> 18 [ label = \"-1.3862943611198906\" ]
1412    15 -> 19 [ label = \"-1.3862943611198906\" ]
1413    0 -> 20 [ label = \"-3.8042006992676702\" ]
1414    20 -> 21 [ label = \"0\" ]
1415    21 -> 22 [ label = \"0\" ]
1416    0 -> 23 [ label = \"-3.8042006992676702\" ]
1417    23 -> 24 [ label = \"0\" ]
1418    24 -> 25 [ label = \"0\" ]
1419    25 -> 26 [ label = \"0\" ]
1420}
1421"
1422        );
1423        let n_categories = lex.categories().collect::<HashSet<_>>().len();
1424        assert_eq!(4, n_categories);
1425        let n_licensors = lex.licensor_types().collect::<HashSet<_>>().len();
1426        assert_eq!(1, n_licensors);
1427        let mut lemmas: Vec<_> = lex.lemmas().collect();
1428        lemmas.sort();
1429        assert_eq!(
1430            lemmas,
1431            vec![
1432                &None,
1433                &None,
1434                &Some("beer"),
1435                &Some("drinks"),
1436                &Some("king"),
1437                &Some("knows"),
1438                &Some("prefers"),
1439                &Some("queen"),
1440                &Some("says"),
1441                &Some("the"),
1442                &Some("which"),
1443                &Some("wine")
1444            ]
1445        );
1446
1447        let lex = Lexicon::from_string(COPY_LANGUAGE)?;
1448        assert_ne!(lex, lex_2);
1449        assert_eq!(
1450            "digraph {
1451    0 [ label = \"root\" ]
1452    1 [ label = \"T\" ]
1453    2 [ label = \"+l\" ]
1454    3 [ label = \"+r\" ]
1455    4 [ label = \"=T\" ]
1456    5 [ label = \"ε\" ]
1457    6 [ label = \"-l\" ]
1458    7 [ label = \"-r\" ]
1459    8 [ label = \"T\" ]
1460    9 [ label = \"ε\" ]
1461    10 [ label = \"T\" ]
1462    11 [ label = \"+l\" ]
1463    12 [ label = \"=A\" ]
1464    13 [ label = \"a\" ]
1465    14 [ label = \"-r\" ]
1466    15 [ label = \"A\" ]
1467    16 [ label = \"+r\" ]
1468    17 [ label = \"=T\" ]
1469    18 [ label = \"a\" ]
1470    19 [ label = \"=B\" ]
1471    20 [ label = \"b\" ]
1472    21 [ label = \"B\" ]
1473    22 [ label = \"+r\" ]
1474    23 [ label = \"=T\" ]
1475    24 [ label = \"b\" ]
1476    0 -> 1 [ label = \"-2.4076059644443806\" ]
1477    1 -> 2 [ label = \"0\" ]
1478    2 -> 3 [ label = \"0\" ]
1479    3 -> 4 [ label = \"0\" ]
1480    4 -> 5 [ label = \"0\" ]
1481    0 -> 6 [ label = \"-0.4076059644443806\" ]
1482    6 -> 7 [ label = \"-1.3132616875182228\" ]
1483    7 -> 8 [ label = \"0\" ]
1484    8 -> 9 [ label = \"0\" ]
1485    6 -> 10 [ label = \"-0.3132616875182228\" ]
1486    10 -> 11 [ label = \"0\" ]
1487    11 -> 12 [ label = \"-0.6931471805599453\" ]
1488    12 -> 13 [ label = \"0\" ]
1489    0 -> 14 [ label = \"-1.4076059644443804\" ]
1490    14 -> 15 [ label = \"-0.6931471805599453\" ]
1491    15 -> 16 [ label = \"0\" ]
1492    16 -> 17 [ label = \"0\" ]
1493    17 -> 18 [ label = \"0\" ]
1494    11 -> 19 [ label = \"-0.6931471805599453\" ]
1495    19 -> 20 [ label = \"0\" ]
1496    14 -> 21 [ label = \"-0.6931471805599453\" ]
1497    21 -> 22 [ label = \"0\" ]
1498    22 -> 23 [ label = \"0\" ]
1499    23 -> 24 [ label = \"0\" ]
1500}
1501",
1502            format!("{}", Dot::new(&lex.graph))
1503        );
1504        let n_categories = lex.categories().collect::<HashSet<_>>().len();
1505        assert_eq!(3, n_categories);
1506        let n_licensors = lex.licensor_types().collect::<HashSet<_>>().len();
1507        assert_eq!(2, n_licensors);
1508        Ok(())
1509    }
1510
1511    #[test]
1512    fn conversion() -> anyhow::Result<()> {
1513        let lex = Lexicon::from_string(STABLER2011)?;
1514        lex.to_owned_values();
1515        Ok(())
1516    }
1517    #[test]
1518    fn conversion2() -> anyhow::Result<()> {
1519        let lex = Lexicon::from_string(STABLER2011)?;
1520        let lex2 = lex.remap_lexicon(|x| x.to_string(), |c| c.to_string());
1521        let lex3 = lex2.remap_lexicon(|x| x.as_str(), |c| c.as_str());
1522        assert_eq!(lex, lex3);
1523
1524        let x = ParsingError::NoLicensorOrCategory(LicenseeOrCategory::Category("s"));
1525        let _y: ParsingError<String> = x.inner_into();
1526
1527        Ok(())
1528    }
1529}