minimalist_grammar_parser/
lexicon.rs

1//! Module which defines the core functions to create or modify MG lexicons.
2
3use ahash::{HashMap, HashSet};
4#[cfg(feature = "pretty")]
5use serde::Serialize;
6
7use crate::Direction;
8use chumsky::{extra::ParserExtra, label::LabelError, text::TextExpected, util::MaybeRef};
9use chumsky::{
10    prelude::*,
11    text::{inline_whitespace, newline},
12};
13use itertools::Itertools;
14
15use logprob::{LogProb, Softmax};
16
17use logprob::LogSumExp;
18
19use petgraph::Direction::Outgoing;
20use petgraph::dot::Dot;
21use petgraph::prelude::StableDiGraph;
22use petgraph::{
23    graph::NodeIndex,
24    visit::{EdgeRef, IntoNodeReferences},
25};
26use std::result::Result;
27use std::{
28    fmt::{Debug, Display},
29    hash::Hash,
30};
31use thiserror::Error;
32
33#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
34///A possible features of a lexical entry
35pub enum Feature<Category: Eq> {
36    ///The category of a lexical entry.
37    Category(Category),
38    ///Poss;ble complements or specifiers (e.g. =d or d=)
39    Selector(Category, Direction),
40    ///Head movement
41    Affix(Category, Direction),
42    ///Possible places for movers to go to (e.g. +wh)
43    Licensor(Category),
44    ///Marks that a lexical entry can move (e.g. -wh)
45    Licensee(Category),
46}
47
48impl<C: Eq> Feature<C> {
49    ///Get a reference to the inner category of a feature.
50    pub fn inner(&self) -> &C {
51        match self {
52            Feature::Category(c)
53            | Feature::Selector(c, _)
54            | Feature::Affix(c, _)
55            | Feature::Licensor(c)
56            | Feature::Licensee(c) => c,
57        }
58    }
59
60    ///Convert the [`Feature<C>`] to its inner category.
61    pub fn into_inner(self) -> C {
62        match self {
63            Feature::Category(c)
64            | Feature::Selector(c, _)
65            | Feature::Affix(c, _)
66            | Feature::Licensor(c)
67            | Feature::Licensee(c) => c,
68        }
69    }
70
71    ///Checks whether this features is a [`Feature::Selector`] or not.
72    pub fn is_selector(&self) -> bool {
73        matches!(self, Self::Selector(_, _))
74    }
75}
76
77#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
78///Returns either a licensee or category: used only in [`ParsingError`]
79#[allow(missing_docs)]
80pub enum LicenseeOrCategory<C> {
81    Licensee(C),
82    Category(C),
83}
84
85impl<C: Debug> Display for LicenseeOrCategory<C> {
86    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
87        match self {
88            LicenseeOrCategory::Licensee(c) => write!(f, "-{c:?}"),
89            LicenseeOrCategory::Category(c) => write!(f, "{c:?}"),
90        }
91    }
92}
93
94#[derive(Error, Clone, Copy, Debug)]
95///Indicates that there is something wrong a lexicon
96pub enum LexiconError {
97    ///Reference a node that doesn't exist
98    #[error("There is no node ({0:?})")]
99    MissingNode(NodeIndex),
100
101    ///Reference a lexeme that doesn't exist
102    #[error("{0:?} is not a lexeme")]
103    MissingLexeme(LexemeId),
104
105    ///Lexicon is malformed since a node isn't a descendent from the root.
106    #[error("Following the parents of node, ({0:?}), doesn't lead to root")]
107    DoesntGoToRoot(NodeIndex),
108
109    ///The parents of a node go beyond the root or go through a leaf.
110    #[error("Following the parents of node, ({0:?}), we pass through the root or another lemma")]
111    InvalidOrder(NodeIndex),
112}
113
114///Indicates a problem with parsing.
115#[derive(Error, Clone, Copy, Debug)]
116pub enum ParsingError<C>
117where
118    C: Debug,
119{
120    ///We try to get a licensor or category of a feature that is not in the grammar.
121    #[error("There is nothing with feature ({0})")]
122    NoLicensorOrCategory(LicenseeOrCategory<C>),
123}
124
125impl<A: Debug> ParsingError<A> {
126    ///Module to convert errors to their owned variants:
127    ///
128    ///```
129    /// # use minimalist_grammar_parser::ParsingError;
130    /// # use minimalist_grammar_parser::lexicon::LicenseeOrCategory;
131    /// let x: ParsingError<&str> = ParsingError::NoLicensorOrCategory(LicenseeOrCategory::Category("s"));
132    /// let y: ParsingError<String> = x.inner_into();
133    ///
134    ///```
135    pub fn inner_into<B: From<A> + Debug>(self: ParsingError<A>) -> ParsingError<B> {
136        match self {
137            ParsingError::NoLicensorOrCategory(LicenseeOrCategory::Licensee(c)) => {
138                ParsingError::NoLicensorOrCategory(LicenseeOrCategory::Licensee(c.into()))
139            }
140            ParsingError::NoLicensorOrCategory(LicenseeOrCategory::Category(c)) => {
141                ParsingError::NoLicensorOrCategory(LicenseeOrCategory::Category(c.into()))
142            }
143        }
144    }
145}
146
147fn grammar_parser<'src>()
148-> impl Parser<'src, &'src str, Lexicon<&'src str, &'src str>, extra::Err<Rich<'src, char>>> {
149    entry_parser::<extra::Err<Rich<'src, char>>>()
150        .separated_by(newline())
151        .allow_leading()
152        .allow_trailing()
153        .collect::<Vec<_>>()
154        .map(|x| Lexicon::new(x, true))
155        .then_ignore(end())
156}
157
158fn entry_parser<'src, E>() -> impl Parser<'src, &'src str, LexicalEntry<&'src str, &'src str>, E>
159where
160    E: ParserExtra<'src, &'src str>,
161    E::Error: LabelError<'src, &'src str, TextExpected<&'src str>>
162        + LabelError<'src, &'src str, MaybeRef<'src, char>>
163        + LabelError<'src, &'src str, &'static str>
164        + LabelError<'src, &'src str, TextExpected<()>>,
165{
166    let feature_name = any()
167        .and_is(none_of([
168            '\t', '\n', ' ', '+', '-', '=', '>', '<', ':', '\r',
169        ]))
170        .repeated()
171        .at_least(1)
172        .labelled("feature name")
173        .to_slice();
174
175    let affix = choice((
176        feature_name
177            .then_ignore(just("<="))
178            .map(|x| Feature::Affix(x, Direction::Right))
179            .labelled("right affix"),
180        just("=>")
181            .ignore_then(feature_name)
182            .map(|x| Feature::Affix(x, Direction::Left))
183            .labelled("left affix"),
184    ));
185
186    let pre_category_features = choice((
187        feature_name
188            .then_ignore(just("="))
189            .map(|x| Feature::Selector(x, Direction::Right))
190            .labelled("right selector"),
191        just("=")
192            .ignore_then(feature_name)
193            .map(|x| Feature::Selector(x, Direction::Left))
194            .labelled("left selector"),
195        just("+")
196            .ignore_then(feature_name)
197            .map(Feature::Licensor)
198            .labelled("licensor"),
199    ));
200
201    choice((
202        just("ε").to(None),
203        (none_of(['\t', '\n', ' ', '+', '-', '=', ':', '\r'])
204            .repeated()
205            .at_least(1)
206            .to_slice())
207        .or_not(),
208    ))
209    .labelled("lemma")
210    .then_ignore(
211        just("::")
212            .padded_by(inline_whitespace())
213            .labelled("lemma feature seperator"),
214    )
215    .then(
216        affix
217            .or_not()
218            .then_ignore(inline_whitespace().or_not())
219            .then(
220                pre_category_features
221                    .separated_by(inline_whitespace())
222                    .allow_trailing()
223                    .collect::<Vec<_>>()
224                    .labelled("pre category features"),
225            )
226            .map(|(affix, mut feats)| {
227                if let Some(affix) = affix {
228                    feats.insert(0, affix);
229                }
230                feats
231            }),
232    )
233    .then(
234        feature_name
235            .map(Feature::Category)
236            .labelled("category feature"),
237    )
238    .then(
239        just('-')
240            .ignore_then(feature_name)
241            .map(Feature::Licensee)
242            .labelled("licensee")
243            .separated_by(inline_whitespace())
244            .allow_leading()
245            .allow_trailing()
246            .collect::<Vec<_>>()
247            .labelled("licensees"),
248    )
249    .map(|(((lemma, mut features), category), mut licensees)| {
250        features.push(category);
251        features.append(&mut licensees);
252        LexicalEntry::new(lemma, features)
253    })
254}
255
256#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
257pub(crate) enum FeatureOrLemma<T: Eq, Category: Eq> {
258    Root,
259    Lemma(Option<T>),
260    Feature(Feature<Category>),
261    Complement(Category, Direction),
262}
263
264impl<T: Eq, C: Eq> FeatureOrLemma<T, C> {
265    fn is_lemma(&self) -> bool {
266        matches!(self, FeatureOrLemma::Lemma(_))
267    }
268}
269
270#[cfg(feature = "sampling")]
271impl<T: Eq, C: Eq + Clone> FeatureOrLemma<T, C> {
272    fn to_complement_mut(&mut self) {
273        if let FeatureOrLemma::Feature(Feature::Selector(c, d)) = self {
274            *self = FeatureOrLemma::Complement(c.clone(), *d);
275        }
276    }
277}
278
279impl<T: Eq, Category: Eq> From<LexicalEntry<T, Category>> for Vec<FeatureOrLemma<T, Category>> {
280    fn from(value: LexicalEntry<T, Category>) -> Self {
281        let LexicalEntry { lemma, features } = value;
282        std::iter::once(lemma)
283            .map(|x| FeatureOrLemma::Lemma(x))
284            .chain(
285                features
286                    .into_iter()
287                    .enumerate()
288                    .map(|(i, feat)| match feat {
289                        //A feature is a complement iff its the first
290                        //selector (i.e. the moment the head first is merged)
291                        Feature::Selector(c, d) if i == 0 => FeatureOrLemma::Complement(c, d),
292                        _ => FeatureOrLemma::Feature(feat),
293                    }),
294            )
295            .collect()
296    }
297}
298
299#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
300///A representation of a lexical entry in a grammar.
301pub struct LexicalEntry<T: Eq, Category: Eq> {
302    pub(crate) lemma: Option<T>,
303    pub(crate) features: Vec<Feature<Category>>,
304}
305impl<T: Eq, Category: Eq> LexicalEntry<T, Category> {
306    ///Creates a new lexical entry
307    pub fn new(lemma: Option<T>, features: Vec<Feature<Category>>) -> LexicalEntry<T, Category> {
308        LexicalEntry { lemma, features }
309    }
310
311    ///Get the lemma (possibly `None`) of a lexical entry
312    pub fn lemma(&self) -> &Option<T> {
313        &self.lemma
314    }
315
316    ///Gets all features of a lexical entry
317    pub fn features(&self) -> &[Feature<Category>] {
318        &self.features
319    }
320
321    ///Change the type of a [`LexicalEntry`]
322    pub fn remap<'lex, T2: Eq, C2: Eq>(
323        &'lex self,
324        lemma_map: impl Fn(&'lex T) -> T2,
325        category_map: impl Fn(&'lex Category) -> C2,
326    ) -> LexicalEntry<T2, C2> {
327        let lemma = self.lemma.as_ref().map(lemma_map);
328        let features = self
329            .features
330            .iter()
331            .map(|x| match x {
332                Feature::Category(c) => Feature::Category(category_map(c)),
333                Feature::Selector(c, direction) => Feature::Selector(category_map(c), *direction),
334                Feature::Affix(c, direction) => Feature::Affix(category_map(c), *direction),
335                Feature::Licensor(c) => Feature::Licensor(category_map(c)),
336                Feature::Licensee(c) => Feature::Licensee(category_map(c)),
337            })
338            .collect();
339        LexicalEntry { lemma, features }
340    }
341
342    ///Gets the category of a lexical entry
343    pub fn category(&self) -> &Category {
344        let mut cat = None;
345        for lex in &self.features {
346            if let Feature::Category(c) = lex {
347                cat = Some(c);
348                break;
349            }
350        }
351        cat.expect("All lexical entries must have one category")
352    }
353}
354
355impl<T: Display + PartialEq + Eq, Category: Display + PartialEq + Eq> Display
356    for LexicalEntry<T, Category>
357{
358    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
359        if let Some(t) = &self.lemma {
360            write!(f, "{t}::")?;
361        } else {
362            write!(f, "ε::")?;
363        }
364        for (i, feature) in self.features.iter().enumerate() {
365            if i > 0 {
366                write!(f, " ")?;
367            }
368            write!(f, "{feature}")?;
369        }
370        Ok(())
371    }
372}
373
374impl<Category: Display + Eq> Display for Feature<Category> {
375    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
376        match self {
377            Feature::Category(x) => write!(f, "{x}"),
378            Feature::Selector(x, Direction::Left) => write!(f, "={x}"),
379            Feature::Selector(x, Direction::Right) => write!(f, "{x}="),
380            Feature::Affix(x, Direction::Left) => write!(f, "=>{x}"),
381            Feature::Affix(x, Direction::Right) => write!(f, "{x}<="),
382            Feature::Licensor(x) => write!(f, "+{x}"),
383            Feature::Licensee(x) => write!(f, "-{x}"),
384        }
385    }
386}
387
388///The struct which represents a specific MG.
389///They can be created by parsing:
390///```
391///# use minimalist_grammar_parser::Lexicon;
392///let grammar: Lexicon<&str, &str> = Lexicon::from_string("John::d\nsaw::d= =d v\nMary::d")?;
393///# Ok::<(), anyhow::Error>(())
394///```
395///It is also possible to make a [`Lexicon`] which use different types for categories or lemmas, such as integers or [`String`]s.
396///These can be made with [`Lexicon::remap_lexicon`] or by passing a vector of [`LexicalEntry`]
397///using [`Lexicon::new`].
398///
399///## Lemmas
400///
401///Lemmas are representated as `Option<T>`, so empty strings are `None` rather than an empty
402///string to allow for arbitrary lemma type `T`.
403///
404///## Useful functions
405///
406/// - [`Lexicon::parse`] parse a string.
407/// - [`Lexicon::parse_multiple`] parse multiple strings simultaneously.
408/// - [`Lexicon::generate`] generate some of the strings of a grammar.
409/// - [`Lexicon::valid_continuations`] find the valid next tokens of a grammar given a prefix
410///
411///
412///## Representation
413///The struct represents a lexicon as a graph, as per Stabler (2013).
414///This means certain functions exploit this, such as [`Lexicon::n_nodes`] which doesn't reference
415///the number of features of the grammar as written out, but rather the number of nodes in the graph representation.
416///
417/// - Stabler, E. (2013). Two Models of Minimalist, Incremental Syntactic Analysis. Topics in Cognitive Science, 5(3), 611–633. <https://doi.org/10.1111/tops.12031>
418#[derive(Debug, Clone)]
419pub struct Lexicon<T: Eq, Category: Eq> {
420    graph: StableDiGraph<FeatureOrLemma<T, Category>, LogProb<f64>>,
421    root: NodeIndex,
422    leaves: Vec<LexemeId>,
423}
424
425#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
426///An ID for each lexeme in a grammar
427pub struct LexemeId(pub(crate) NodeIndex);
428
429#[cfg(feature = "pretty")]
430impl Serialize for LexemeId {
431    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
432    where
433        S: serde::Serializer,
434    {
435        use serde::ser::SerializeTupleStruct;
436
437        let mut s = serializer.serialize_tuple_struct("LexemeId", 1)?;
438        s.serialize_field(&self.0.index())?;
439        s.end()
440    }
441}
442
443impl<T: Eq, Category: Eq> PartialEq for Lexicon<T, Category> {
444    fn eq(&self, other: &Self) -> bool {
445        self.root == other.root
446            && self.leaves == other.leaves
447            && self.graph.node_weights().eq(other.graph.node_weights())
448            && self.graph.edge_weights().eq(other.graph.edge_weights())
449    }
450}
451impl<T: Eq, Category: Eq> Eq for Lexicon<T, Category> {}
452impl<T, Category> Lexicon<T, Category>
453where
454    T: Eq + std::fmt::Debug + Clone + Display,
455    Category: Eq + std::fmt::Debug + Clone + Display,
456{
457    ///Prints a lexicon as a `GraphViz` dot file.
458    #[must_use]
459    pub fn graphviz(&self) -> String {
460        let dot = Dot::new(&self.graph);
461        format!("{dot}")
462    }
463}
464
465fn renormalise_weights<T: Eq + Clone, C: Eq + Clone>(
466    mut graph: StableDiGraph<FeatureOrLemma<T, C>, f64>,
467) -> StableDiGraph<FeatureOrLemma<T, C>, LogProb<f64>> {
468    //Renormalise probabilities to sum to one.
469    for node_index in graph.node_indices().collect_vec() {
470        let edges: Vec<_> = graph
471            .edges_directed(node_index, petgraph::Direction::Outgoing)
472            .map(|e| (*e.weight(), e.id()))
473            .collect();
474
475        match edges.len().cmp(&1) {
476            std::cmp::Ordering::Equal => {
477                for (_, edge) in edges {
478                    graph[edge] = 0.0;
479                }
480            }
481            std::cmp::Ordering::Greater => {
482                let dist = edges
483                    .iter()
484                    .map(|(w, _edge)| *w)
485                    .softmax()
486                    .unwrap()
487                    .map(logprob::LogProb::into_inner);
488
489                for (new_weight, (_weight, edge)) in dist.zip(edges.iter()) {
490                    graph[*edge] = new_weight;
491                }
492            }
493            _ => (),
494        }
495    }
496
497    //TODO: Get rid of this annoying clone.
498    graph.map(|_, n| n.clone(), |_, e| LogProb::new(*e).unwrap())
499}
500
501///Iterator that climbs up a graph until it reaches the root.
502pub struct Climber<'a, T: Eq, C: Eq> {
503    lex: &'a Lexicon<T, C>,
504    pos: NodeIndex,
505}
506
507impl<T: Eq, C: Eq + Clone> Iterator for Climber<'_, T, C> {
508    type Item = Feature<C>;
509
510    fn next(&mut self) -> Option<Self::Item> {
511        self.pos = self.lex.parent_of(self.pos)?;
512
513        match self.lex.graph.node_weight(self.pos) {
514            Some(x) => match x {
515                FeatureOrLemma::Root => None,
516                FeatureOrLemma::Lemma(_) => None,
517                FeatureOrLemma::Feature(feature) => Some(feature.clone()),
518                FeatureOrLemma::Complement(c, direction) => {
519                    Some(Feature::Selector(c.clone(), *direction))
520                }
521            },
522            None => None,
523        }
524    }
525}
526
527pub(crate) fn fix_weights_per_node<T: Eq, C: Eq>(
528    graph: &mut StableDiGraph<FeatureOrLemma<T, C>, LogProb<f64>>,
529    node_index: NodeIndex,
530) {
531    let sum = graph
532        .edges_directed(node_index, Outgoing)
533        .map(|x| x.weight())
534        .log_sum_exp_float_no_alloc();
535
536    if sum != 0.0 {
537        let edges: Vec<_> = graph
538            .edges_directed(node_index, Outgoing)
539            .map(|e| e.id())
540            .collect();
541        for e in edges {
542            graph[e] = LogProb::new(graph[e].into_inner() - sum).unwrap();
543        }
544    }
545}
546
547pub(crate) fn fix_weights<T: Eq + Clone, C: Eq + Clone>(
548    graph: &mut StableDiGraph<FeatureOrLemma<T, C>, LogProb<f64>>,
549) {
550    //Renormalise probabilities to sum to one.
551    for node_index in graph.node_indices().collect_vec() {
552        fix_weights_per_node(graph, node_index);
553    }
554}
555
556///A struct which records whether a given licensee must always occur with a lexeme of a given
557///category
558#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
559pub struct ObligatoryMover<C: Eq> {
560    ///The licensee which always occurs with the category.
561    pub licensee: C,
562    ///The category that requires a specific licensee
563    pub category: C,
564}
565impl<T: Eq + Debug, Category: Eq + Hash + Clone> Lexicon<T, Category> {
566    ///This function goes through a lexicon and returns all licensees which are *always* correlated
567    ///with a *specific* category.
568    ///```
569    /// # use minimalist_grammar_parser::Lexicon;
570    /// # use minimalist_grammar_parser::lexicon::ObligatoryMover;
571    /// # use anyhow;
572    /// # fn main() -> anyhow::Result<()> {
573    /// let lex = Lexicon::from_string("John::d -k -q\nwho::d -k -q -w\nsaw::d= +k v")?;
574    /// assert_eq!(
575    ///     lex.obligatory_movers(),
576    ///     vec![
577    ///         ObligatoryMover {
578    ///             licensee: "k",
579    ///             category: "d",
580    ///         },
581    ///         ObligatoryMover {
582    ///             licensee: "q",
583    ///             category: "d",
584    ///         },
585    ///     ]
586    /// );
587    /// let lex = Lexicon::from_string("John::d -k -q\nwho::d -k -w\nsaw::d= +k v")?;
588    /// assert_eq!(
589    ///     lex.obligatory_movers(),
590    ///     vec![ObligatoryMover {
591    ///         licensee: "k",
592    ///         category: "d",
593    ///     }]
594    /// );
595    /// let lex = Lexicon::from_string("John::d -q\nwho::d -k -q -w\nsaw::d= +k v")?;
596    /// assert_eq!(lex.obligatory_movers(), vec![]);
597    ///
598    /// let lex = Lexicon::from_string("John::d -k -q\nwho::d -k -q -w\nsaw::d= +k v -q")?;
599    /// assert_eq!(
600    ///     lex.obligatory_movers(),
601    ///     vec![ObligatoryMover {
602    ///         licensee: "k",
603    ///         category: "d",
604    ///     }]
605    /// );
606    ///# Ok(())
607    ///# }
608    ///
609    ///```
610    #[must_use]
611    pub fn obligatory_movers(&self) -> Vec<ObligatoryMover<Category>> {
612        let mut stack = vec![(self.root, vec![])];
613        let mut histories: HashMap<&Category, Vec<Vec<&Category>>> = HashMap::default();
614
615        while let Some((x, hist)) = stack.pop() {
616            for child in self.children_of(x) {
617                match self.get(child).unwrap().0 {
618                    FeatureOrLemma::Feature(Feature::Licensee(c)) => {
619                        let mut hist = hist.clone();
620                        hist.push(c);
621                        stack.push((child, hist));
622                    }
623                    FeatureOrLemma::Feature(Feature::Category(c)) => {
624                        histories.entry(c).or_default().push(hist.clone());
625                    }
626                    _ => (),
627                }
628            }
629        }
630
631        let mut movers = vec![];
632        let mut used_licensees = HashSet::default();
633        for (category, mut licensees) in histories {
634            while let Ok(Some(licensee)) = licensees
635                .iter_mut()
636                .map(std::vec::Vec::pop)
637                .all_equal_value()
638            {
639                if used_licensees.contains(licensee) {
640                    movers.retain(|x: &ObligatoryMover<Category>| &x.licensee != licensee);
641                } else {
642                    used_licensees.insert(licensee);
643                    movers.push(ObligatoryMover {
644                        licensee: licensee.clone(),
645                        category: category.clone(),
646                    });
647                }
648            }
649        }
650
651        movers
652    }
653}
654
655impl<T: Eq, Category: Eq> Lexicon<T, Category> {
656    pub(crate) fn get(
657        &self,
658        nx: NodeIndex,
659    ) -> Option<(&FeatureOrLemma<T, Category>, LogProb<f64>)> {
660        if let Some(x) = self.graph.node_weight(nx) {
661            let p = self
662                .graph
663                .edges_directed(nx, petgraph::Direction::Incoming)
664                .next()
665                .unwrap()
666                .weight();
667            Some((x, *p))
668        } else {
669            None
670        }
671    }
672
673    ///Goes over all categories than can be parsed. Crucially, will exclude any category that may
674    ///must necessarily be moved.
675    pub fn root_categories(&self) -> impl Iterator<Item = &Category> {
676        self.children_of(self.root).filter_map(|x| {
677            if let FeatureOrLemma::Feature(Feature::Category(c)) =
678                self.graph.node_weight(x).unwrap()
679            {
680                Some(c)
681            } else {
682                None
683            }
684        })
685    }
686
687    pub(crate) fn add_lexical_entry(
688        &mut self,
689        lexical_entry: LexicalEntry<T, Category>,
690    ) -> Option<LexemeId> {
691        let LexicalEntry { lemma, features } = lexical_entry;
692
693        let mut new_nodes = vec![FeatureOrLemma::Lemma(lemma)];
694        for (i, f) in features.into_iter().enumerate() {
695            let f = if i == 0 && f.is_selector() {
696                let Feature::Selector(c, d) = f else {
697                    panic!("We checked it was a selector!")
698                };
699                FeatureOrLemma::Complement(c, d)
700            } else {
701                FeatureOrLemma::Feature(f)
702            };
703            new_nodes.push(f);
704        }
705
706        let mut pos = self.root;
707        while let Some(f) = new_nodes.pop() {
708            if let Some(p) = self.children_of(pos).find(|x| self.find(*x) == Some(&f)) {
709                pos = p;
710            } else {
711                let new_pos = self.graph.add_node(f);
712                self.graph.add_edge(pos, new_pos, LogProb::prob_of_one());
713                fix_weights_per_node(&mut self.graph, pos);
714                pos = new_pos;
715            }
716        }
717        if self.leaves.contains(&LexemeId(pos)) {
718            None
719        } else {
720            self.leaves.push(LexemeId(pos));
721            Some(LexemeId(pos))
722        }
723    }
724
725    ///The number of children of a node
726    pub(crate) fn n_children(&self, nx: NodeIndex) -> usize {
727        self.graph
728            .edges_directed(nx, petgraph::Direction::Outgoing)
729            .count()
730    }
731
732    ///The children of a node
733    pub(crate) fn children_of(&self, nx: NodeIndex) -> impl Iterator<Item = NodeIndex> + '_ {
734        self.graph
735            .edges_directed(nx, petgraph::Direction::Outgoing)
736            .map(|e| e.target())
737    }
738
739    ///Finds a node's feature
740    pub(crate) fn find(&self, nx: NodeIndex) -> Option<&FeatureOrLemma<T, Category>> {
741        self.graph.node_weight(nx)
742    }
743
744    ///Climb up a node over all of its features
745    pub(crate) fn node_to_features(&self, nx: NodeIndex) -> Climber<'_, T, Category> {
746        Climber { lex: self, pos: nx }
747    }
748
749    ///Climb up a lexeme over all of its features
750    #[must_use]
751    pub fn leaf_to_features(&self, lexeme: LexemeId) -> Option<Climber<'_, T, Category>> {
752        if !matches!(
753            self.graph.node_weight(lexeme.0),
754            Some(FeatureOrLemma::Lemma(_))
755        ) {
756            return None;
757        }
758
759        Some(Climber {
760            lex: self,
761            pos: lexeme.0,
762        })
763    }
764
765    ///Gets the lemma of a lexeme.
766    #[must_use]
767    pub fn leaf_to_lemma(&self, lexeme_id: LexemeId) -> Option<&Option<T>> {
768        match self.graph.node_weight(lexeme_id.0) {
769            Some(x) => {
770                if let FeatureOrLemma::Lemma(l) = x {
771                    Some(l)
772                } else {
773                    None
774                }
775            }
776            None => None,
777        }
778    }
779
780    ///Get the category of a lexeme.
781    #[must_use]
782    pub fn category(&self, lexeme_id: LexemeId) -> Option<&Category> {
783        let mut pos = lexeme_id.0;
784        while !matches!(
785            self.graph[pos],
786            FeatureOrLemma::Feature(Feature::Category(_))
787        ) {
788            let new_pos = self.parent_of(pos)?;
789            pos = new_pos;
790        }
791        let FeatureOrLemma::Feature(Feature::Category(cat)) = &self.graph[pos] else {
792            return None;
793        };
794        Some(cat)
795    }
796
797    ///Get the parent of a node.
798    pub(crate) fn parent_of(&self, nx: NodeIndex) -> Option<NodeIndex> {
799        self.graph
800            .edges_directed(nx, petgraph::Direction::Incoming)
801            .next()
802            .map(|e| e.source())
803    }
804}
805
806impl<T: Eq, Category: Eq> Lexicon<T, Category> {
807    ///Checks if `nx` is a complement
808    #[must_use]
809    pub fn is_complement(&self, nx: NodeIndex) -> bool {
810        matches!(
811            self.graph.node_weight(nx).unwrap(),
812            FeatureOrLemma::Complement(_, _) | FeatureOrLemma::Feature(Feature::Affix(_, _))
813        )
814    }
815
816    ///The number of nodes in a lexicon.
817    #[must_use]
818    pub fn n_nodes(&self) -> usize {
819        self.graph.node_count()
820    }
821
822    ///Returns the leaves of a grammar
823    #[must_use]
824    pub fn leaves(&self) -> &[LexemeId] {
825        &self.leaves
826    }
827
828    ///Get all lemmas of a grammar
829    pub fn lemmas(&self) -> impl Iterator<Item = &Option<T>> {
830        self.leaves.iter().filter_map(|x| match &self.graph[x.0] {
831            FeatureOrLemma::Lemma(x) => Some(x),
832            _ => None,
833        })
834    }
835
836    ///Get the feature of a node, if it has one.
837    pub(crate) fn get_feature_category(&self, nx: NodeIndex) -> Option<&Category> {
838        self.graph.node_weight(nx).and_then(|x| match x {
839            FeatureOrLemma::Root => None,
840            FeatureOrLemma::Lemma(_) => None,
841            FeatureOrLemma::Feature(feature) => match feature {
842                Feature::Category(c)
843                | Feature::Selector(c, _)
844                | Feature::Licensor(c)
845                | Feature::Licensee(c)
846                | Feature::Affix(c, _) => Some(c),
847            },
848            FeatureOrLemma::Complement(c, _) => Some(c),
849        })
850    }
851}
852
853impl<T: Eq + std::fmt::Debug + Clone, Category: Eq + std::fmt::Debug + Clone> Lexicon<T, Category> {
854    ///Returns all leaves with their sibling nodes (e.g. exemes that are identical except for
855    ///their lemma)
856    #[must_use]
857    pub fn sibling_leaves(&self) -> Vec<Vec<LexemeId>> {
858        let mut leaves = self.leaves.clone();
859        let mut result = vec![];
860        while let Some(leaf) = leaves.pop() {
861            let siblings: Vec<_> = self
862                .children_of(self.parent_of(leaf.0).unwrap())
863                .filter_map(|a| {
864                    if matches!(self.graph.node_weight(a).unwrap(), FeatureOrLemma::Lemma(_)) {
865                        Some(LexemeId(a))
866                    } else {
867                        None
868                    }
869                })
870                .collect();
871            leaves.retain(|x| !siblings.contains(x));
872            result.push(siblings);
873        }
874        result
875    }
876
877    ///Gets each lexical entry along with its ID.
878    pub fn lexemes_and_ids(
879        &self,
880    ) -> Result<impl Iterator<Item = (LexemeId, LexicalEntry<T, Category>)>, LexiconError> {
881        Ok(self.leaves().iter().copied().zip(self.lexemes()?))
882    }
883
884    ///Turns a lexicon into a `Vec<LexicalEntry<T, Category>>` which can be useful for printing or
885    ///investigating individual lexical entries.
886    pub fn lexemes(&self) -> Result<Vec<LexicalEntry<T, Category>>, LexiconError> {
887        let mut v = vec![];
888
889        //NOTE: Must guarantee to iterate in this order.
890        for leaf in &self.leaves {
891            if let FeatureOrLemma::Lemma(lemma) = &self.graph[leaf.0] {
892                let mut features = vec![];
893                let mut nx = leaf.0;
894                while let Some(parent) = self.parent_of(nx) {
895                    if parent == self.root {
896                        break;
897                    } else if let FeatureOrLemma::Feature(f) = &self.graph[parent] {
898                        features.push(f.clone());
899                    } else if let FeatureOrLemma::Complement(c, d) = &self.graph[parent] {
900                        features.push(Feature::Selector(c.clone(), *d));
901                    }
902                    nx = parent;
903                }
904
905                v.push(LexicalEntry {
906                    lemma: lemma.clone(),
907                    features,
908                });
909            } else {
910                return Err(LexiconError::MissingLexeme(*leaf));
911            }
912        }
913        Ok(v)
914    }
915
916    ///Given a leaf node, return its [`LexicalEntry`]
917    pub fn get_lexical_entry(
918        &self,
919        lexeme_id: LexemeId,
920    ) -> Result<LexicalEntry<T, Category>, LexiconError> {
921        let Some(lemma) = self.graph.node_weight(lexeme_id.0) else {
922            return Err(LexiconError::MissingLexeme(lexeme_id));
923        };
924        let lemma = match lemma {
925            FeatureOrLemma::Lemma(lemma) => lemma,
926            FeatureOrLemma::Feature(_)
927            | FeatureOrLemma::Root
928            | FeatureOrLemma::Complement(_, _) => {
929                return Err(LexiconError::MissingLexeme(lexeme_id));
930            }
931        }
932        .clone();
933
934        let features = self.leaf_to_features(lexeme_id).unwrap().collect();
935        Ok(LexicalEntry { lemma, features })
936    }
937
938    ///Create a new grammar from a [`Vec`] of [`LexicalEntry`]
939    #[must_use]
940    pub fn new(items: Vec<LexicalEntry<T, Category>>, collapse_lemmas: bool) -> Self {
941        let n_items = items.len();
942        Self::new_with_weights(
943            items,
944            std::iter::repeat_n(1.0, n_items).collect(),
945            collapse_lemmas,
946        )
947    }
948
949    ///Create a new grammar from a [`Vec`] of [`LexicalEntry`] where lexical entries are weighted
950    ///in probability
951    pub fn new_with_weights(
952        items: Vec<LexicalEntry<T, Category>>,
953        weights: Vec<f64>,
954        collapse_lemmas: bool,
955    ) -> Self {
956        let mut graph = StableDiGraph::new();
957        let root_index = graph.add_node(FeatureOrLemma::Root);
958        let mut leaves = vec![];
959
960        for (lexeme, weight) in items.into_iter().zip(weights.into_iter()) {
961            let lexeme: Vec<FeatureOrLemma<T, Category>> = lexeme.into();
962            let mut node_index = root_index;
963
964            for feature in lexeme.into_iter().rev() {
965                //We go down the feature list and add nodes and edges corresponding to features
966                //If they exist already, we just follow the path until we have to start adding.
967                node_index = if (!feature.is_lemma() || collapse_lemmas)
968                    && let Some((nx, edge_index)) = graph
969                        .edges_directed(node_index, petgraph::Direction::Outgoing)
970                        .find(|x| graph[x.target()] == feature)
971                        .map(|x| (x.target(), x.id()))
972                {
973                    graph[edge_index] += weight;
974                    nx
975                } else {
976                    let new_node_index = graph.add_node(feature);
977                    graph.add_edge(node_index, new_node_index, weight);
978                    new_node_index
979                };
980
981                if let FeatureOrLemma::Lemma(_) = graph[node_index] {
982                    leaves.push(LexemeId(node_index));
983                }
984            }
985        }
986        let graph = renormalise_weights(graph);
987        Lexicon {
988            graph,
989            leaves,
990            root: root_index,
991        }
992    }
993
994    ///Get the node corresponding to a category
995    pub(crate) fn find_category(
996        &self,
997        category: &Category,
998    ) -> Result<NodeIndex, ParsingError<Category>> {
999        match self
1000            .graph
1001            .neighbors_directed(self.root, petgraph::Direction::Outgoing)
1002            .find(|i| match &self.graph[*i] {
1003                FeatureOrLemma::Feature(Feature::Category(c)) => c == category,
1004                _ => false,
1005            }) {
1006            Some(x) => Ok(x),
1007            None => Err(ParsingError::NoLicensorOrCategory(
1008                LicenseeOrCategory::Category(category.clone()),
1009            )),
1010        }
1011    }
1012
1013    ///Get the node corresponding to a licensee
1014    pub(crate) fn find_licensee(
1015        &self,
1016        category: &Category,
1017    ) -> Result<NodeIndex, ParsingError<Category>> {
1018        match self
1019            .graph
1020            .neighbors_directed(self.root, petgraph::Direction::Outgoing)
1021            .find(|i| match &self.graph[*i] {
1022                FeatureOrLemma::Feature(Feature::Licensee(c)) => c == category,
1023                _ => false,
1024            }) {
1025            Some(x) => Ok(x),
1026            None => Err(ParsingError::NoLicensorOrCategory(
1027                LicenseeOrCategory::Licensee(category.clone()),
1028            )),
1029        }
1030    }
1031
1032    ///Iterate over all categories of a grammar whether as selectors or categories. This goes over
1033    ///the entire lexicon so it will repeat elements multiple times.
1034    pub fn categories(&self) -> impl Iterator<Item = &Category> {
1035        self.graph.node_references().filter_map(|(_, x)| match x {
1036            FeatureOrLemma::Feature(
1037                Feature::Category(x) | Feature::Selector(x, _) | Feature::Affix(x, _),
1038            )
1039            | FeatureOrLemma::Complement(x, _) => Some(x),
1040            _ => None,
1041        })
1042    }
1043
1044    ///Iterate over all licensors of a grammar
1045    pub fn licensor_types(&self) -> impl Iterator<Item = &Category> {
1046        self.graph.node_references().filter_map(|(_, x)| match x {
1047            FeatureOrLemma::Feature(Feature::Licensor(x) | Feature::Licensee(x)) => Some(x),
1048            _ => None,
1049        })
1050    }
1051}
1052
1053impl<'src> Lexicon<&'src str, &'src str> {
1054    ///Parse a grammar and return a [`Lexicon<&str, &str>`]
1055    pub fn from_string(s: &'src str) -> Result<Self, LexiconParsingError<'src>> {
1056        grammar_parser()
1057            .padded()
1058            .then_ignore(end())
1059            .parse(s)
1060            .into_result()
1061            .map_err(std::convert::Into::into)
1062    }
1063}
1064
1065impl<T: Eq, C: Eq> Lexicon<T, C> {
1066    ///Converts a `Lexicon<T,C>` to a `Lexicon<T2, C2>` according to two functions which remap
1067    ///them.
1068    pub fn remap_lexicon<'lex, T2: Eq, C2: Eq>(
1069        &'lex self,
1070        lemma_map: impl Fn(&'lex T) -> T2,
1071        category_map: impl Fn(&'lex C) -> C2,
1072    ) -> Lexicon<T2, C2> {
1073        let Lexicon {
1074            graph,
1075            root,
1076            leaves,
1077        } = self;
1078
1079        //Per StableGraph documentation, the node indices are still valid on the mapped data
1080        //structure.
1081        let graph = graph.map(
1082            |_, x| match x {
1083                FeatureOrLemma::Root => FeatureOrLemma::Root,
1084                FeatureOrLemma::Lemma(Some(s)) => FeatureOrLemma::Lemma(Some(lemma_map(s))),
1085                FeatureOrLemma::Lemma(None) => FeatureOrLemma::Lemma(None),
1086                FeatureOrLemma::Feature(Feature::Category(c)) => {
1087                    FeatureOrLemma::Feature(Feature::Category(category_map(c)))
1088                }
1089                FeatureOrLemma::Feature(Feature::Licensor(c)) => {
1090                    FeatureOrLemma::Feature(Feature::Licensor(category_map(c)))
1091                }
1092                FeatureOrLemma::Feature(Feature::Licensee(c)) => {
1093                    FeatureOrLemma::Feature(Feature::Licensee(category_map(c)))
1094                }
1095                FeatureOrLemma::Feature(Feature::Affix(c, d)) => {
1096                    FeatureOrLemma::Feature(Feature::Affix(category_map(c), *d))
1097                }
1098                FeatureOrLemma::Feature(Feature::Selector(c, d)) => {
1099                    FeatureOrLemma::Feature(Feature::Selector(category_map(c), *d))
1100                }
1101                FeatureOrLemma::Complement(c, direction) => {
1102                    FeatureOrLemma::Complement(category_map(c), *direction)
1103                }
1104            },
1105            |_, e| *e,
1106        );
1107        Lexicon {
1108            graph,
1109            root: *root,
1110            leaves: leaves.clone(),
1111        }
1112    }
1113}
1114
1115impl<'a> Lexicon<&'a str, &'a str> {
1116    ///Converts from `Lexicon<&str, &str>` to `Lexicon<String, String>`
1117    #[must_use]
1118    pub fn to_owned_values(&self) -> Lexicon<String, String> {
1119        let Lexicon {
1120            graph,
1121            root,
1122            leaves,
1123        } = self;
1124
1125        let graph = graph.map(
1126            |_, x| match *x {
1127                FeatureOrLemma::Root => FeatureOrLemma::Root,
1128                FeatureOrLemma::Lemma(s) => {
1129                    FeatureOrLemma::Lemma(s.map(std::borrow::ToOwned::to_owned))
1130                }
1131                FeatureOrLemma::Feature(Feature::Category(c)) => {
1132                    FeatureOrLemma::Feature(Feature::Category(c.to_owned()))
1133                }
1134                FeatureOrLemma::Feature(Feature::Affix(c, d)) => {
1135                    FeatureOrLemma::Feature(Feature::Affix(c.to_owned(), d))
1136                }
1137                FeatureOrLemma::Feature(Feature::Licensor(c)) => {
1138                    FeatureOrLemma::Feature(Feature::Licensor(c.to_owned()))
1139                }
1140                FeatureOrLemma::Feature(Feature::Licensee(c)) => {
1141                    FeatureOrLemma::Feature(Feature::Licensee(c.to_owned()))
1142                }
1143                FeatureOrLemma::Feature(Feature::Selector(c, d)) => {
1144                    FeatureOrLemma::Feature(Feature::Selector(c.to_owned(), d))
1145                }
1146                FeatureOrLemma::Complement(c, direction) => {
1147                    FeatureOrLemma::Complement(c.to_owned(), direction)
1148                }
1149            },
1150            |_, e| *e,
1151        );
1152        Lexicon {
1153            graph,
1154            root: *root,
1155            leaves: leaves.clone(),
1156        }
1157    }
1158}
1159
1160///Problem parsing a grammar; returns a vector of all [`Rich`] errors from Chumsky
1161#[derive(Error, Debug, Clone)]
1162pub struct LexiconParsingError<'a>(pub Vec<Rich<'a, char>>);
1163
1164impl Display for LexiconParsingError<'_> {
1165    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1166        writeln!(
1167            f,
1168            "{}",
1169            self.0
1170                .iter()
1171                .map(std::string::ToString::to_string)
1172                .collect::<Vec<_>>()
1173                .join("\n")
1174        )
1175    }
1176}
1177
1178impl<'a> From<Vec<Rich<'a, char>>> for LexiconParsingError<'a> {
1179    fn from(value: Vec<Rich<'a, char>>) -> Self {
1180        LexiconParsingError(value)
1181    }
1182}
1183
1184impl LexicalEntry<&str, &str> {
1185    ///Parses a single lexical entry and returns it as a [`LexicalEntry`].
1186    pub fn parse(s: &str) -> Result<LexicalEntry<&str, &str>, LexiconParsingError<'_>> {
1187        entry_parser::<extra::Err<Rich<char>>>()
1188            .parse(s)
1189            .into_result()
1190            .map_err(std::convert::Into::into)
1191    }
1192}
1193
1194impl<T, C> std::fmt::Display for FeatureOrLemma<T, C>
1195where
1196    T: Eq + Display,
1197    C: Eq + Display,
1198{
1199    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1200        match self {
1201            FeatureOrLemma::Root => write!(f, "root"),
1202            FeatureOrLemma::Lemma(lemma) => {
1203                if let Some(l) = lemma {
1204                    write!(f, "{l}")
1205                } else {
1206                    write!(f, "ε")
1207                }
1208            }
1209            FeatureOrLemma::Feature(feature) => write!(f, "{feature}"),
1210            FeatureOrLemma::Complement(c, d) => write!(f, "{}", Feature::Selector(c, *d)),
1211        }
1212    }
1213}
1214
1215impl<T, C> Display for Lexicon<T, C>
1216where
1217    T: Eq + Display + std::fmt::Debug + Clone,
1218    C: Eq + Display + std::fmt::Debug + Clone,
1219{
1220    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1221        write!(
1222            f,
1223            "{}",
1224            self.lexemes()
1225                .unwrap()
1226                .iter()
1227                .map(std::string::ToString::to_string)
1228                .join("\n")
1229        )
1230    }
1231}
1232
1233#[cfg(feature = "semantics")]
1234mod semantics;
1235
1236#[cfg(feature = "semantics")]
1237pub use semantics::SemanticLexicon;
1238
1239pub mod mdl;
1240
1241#[cfg(feature = "sampling")]
1242pub mod mutations;
1243
1244#[cfg(test)]
1245mod tests {
1246
1247    use std::collections::HashSet;
1248
1249    use anyhow::Result;
1250
1251    use super::*;
1252
1253    #[test]
1254    fn categories() -> Result<()> {
1255        let lex = Lexicon::from_string("::V= +Z C -W")?;
1256
1257        let mut cats = lex.categories().collect::<Vec<_>>();
1258        cats.sort();
1259        assert_eq!(vec![&"C", &"V"], cats);
1260        let mut lice = lex.licensor_types().collect::<Vec<_>>();
1261        lice.sort();
1262        assert_eq!(vec![&"W", &"Z"], lice);
1263        Ok(())
1264    }
1265
1266    type SimpleLexicalEntry<'a> = LexicalEntry<&'a str, &'a str>;
1267    #[test]
1268    fn parsing() {
1269        assert_eq!(
1270            SimpleLexicalEntry::parse("John::d").unwrap(),
1271            SimpleLexicalEntry {
1272                lemma: Some("John"),
1273                features: vec![Feature::Category("d")]
1274            }
1275        );
1276        assert_eq!(
1277            SimpleLexicalEntry::parse("eats::d= =d V").unwrap(),
1278            SimpleLexicalEntry {
1279                lemma: Some("eats"),
1280                features: vec![
1281                    Feature::Selector("d", Direction::Right),
1282                    Feature::Selector("d", Direction::Left),
1283                    Feature::Category("V")
1284                ]
1285            }
1286        );
1287        assert_eq!(
1288            SimpleLexicalEntry::parse("::d= =d V").unwrap(),
1289            SimpleLexicalEntry::new(
1290                None,
1291                vec![
1292                    Feature::Selector("d", Direction::Right),
1293                    Feature::Selector("d", Direction::Left),
1294                    Feature::Category("V")
1295                ]
1296            )
1297        );
1298        assert_eq!(
1299            SimpleLexicalEntry::parse("ε::d= =d V").unwrap(),
1300            SimpleLexicalEntry {
1301                lemma: None,
1302                features: vec![
1303                    Feature::Selector("d", Direction::Right),
1304                    Feature::Selector("d", Direction::Left),
1305                    Feature::Category("V")
1306                ]
1307            }
1308        );
1309        assert_eq!(
1310            SimpleLexicalEntry::parse("ε::d<= V").unwrap(),
1311            SimpleLexicalEntry {
1312                lemma: None,
1313                features: vec![
1314                    Feature::Affix("d", Direction::Right),
1315                    Feature::Category("V")
1316                ]
1317            }
1318        );
1319    }
1320
1321    #[test]
1322    fn get_lexical_entry() -> anyhow::Result<()> {
1323        let entries: HashSet<_> = STABLER2011
1324            .split('\n')
1325            .map(SimpleLexicalEntry::parse)
1326            .collect::<Result<_, LexiconParsingError>>()?;
1327        let lex = Lexicon::from_string(STABLER2011)?;
1328        for nx in &lex.leaves {
1329            let lexical_entry = lex.get_lexical_entry(*nx)?;
1330            assert!(entries.contains(&lexical_entry));
1331        }
1332        Ok(())
1333    }
1334    #[test]
1335    fn get_category() {
1336        assert_eq!(
1337            *SimpleLexicalEntry::parse("eats::d= =d V")
1338                .unwrap()
1339                .category(),
1340            "V"
1341        );
1342        assert_eq!(
1343            *SimpleLexicalEntry::parse("eats::d= V -d")
1344                .unwrap()
1345                .category(),
1346            "V"
1347        );
1348        assert_eq!(
1349            *SimpleLexicalEntry::parse("eats::C -d").unwrap().category(),
1350            "C"
1351        );
1352        assert_eq!(
1353            *SimpleLexicalEntry::parse("eats::+w Z -d")
1354                .unwrap()
1355                .category(),
1356            "Z"
1357        );
1358    }
1359
1360    #[test]
1361    fn convert_to_vec() {
1362        let x: Vec<FeatureOrLemma<_, _>> =
1363            SimpleLexicalEntry::parse("eats::d= =d V").unwrap().into();
1364        assert_eq!(
1365            x,
1366            vec![
1367                FeatureOrLemma::Lemma(Some("eats")),
1368                FeatureOrLemma::Complement("d", Direction::Right),
1369                FeatureOrLemma::Feature(Feature::Selector("d", Direction::Left)),
1370                FeatureOrLemma::Feature(Feature::Category("V")),
1371            ]
1372        );
1373    }
1374
1375    use crate::grammars::{COPY_LANGUAGE, STABLER2011};
1376    use petgraph::dot::Dot;
1377
1378    #[test]
1379    fn category() -> anyhow::Result<()> {
1380        let lex = Lexicon::from_string(STABLER2011)?;
1381        let leaves = lex
1382            .leaves()
1383            .iter()
1384            .map(|&x| (*lex.leaf_to_lemma(x).unwrap(), *lex.category(x).unwrap()))
1385            .collect::<Vec<_>>();
1386        assert_eq!(
1387            leaves,
1388            vec![
1389                (None, "C"),
1390                (None, "C"),
1391                (Some("knows"), "V"),
1392                (Some("says"), "V"),
1393                (Some("prefers"), "V"),
1394                (Some("drinks"), "V"),
1395                (Some("king"), "N"),
1396                (Some("wine"), "N"),
1397                (Some("beer"), "N"),
1398                (Some("queen"), "N"),
1399                (Some("the"), "D"),
1400                (Some("which"), "D")
1401            ]
1402        );
1403        Ok(())
1404    }
1405
1406    #[test]
1407    fn siblings() -> anyhow::Result<()> {
1408        let lex = Lexicon::from_string(STABLER2011)?;
1409        let siblings = lex.sibling_leaves();
1410        let siblings = siblings
1411            .into_iter()
1412            .map(|x| {
1413                x.into_iter()
1414                    .map(|x| *lex.leaf_to_lemma(x).unwrap())
1415                    .collect::<Vec<_>>()
1416            })
1417            .collect::<Vec<_>>();
1418        assert_eq!(
1419            siblings,
1420            vec![
1421                vec![Some("which")],
1422                vec![Some("the")],
1423                vec![Some("queen"), Some("beer"), Some("wine"), Some("king")],
1424                vec![Some("drinks"), Some("prefers")],
1425                vec![Some("says"), Some("knows")],
1426                vec![None],
1427                vec![None]
1428            ]
1429        );
1430        Ok(())
1431    }
1432
1433    #[test]
1434    fn initialize_lexicon() -> anyhow::Result<()> {
1435        let strings: Vec<&str> = STABLER2011.split('\n').collect();
1436
1437        let lex = Lexicon::from_string(STABLER2011)?;
1438        for lex in lex.lexemes()? {
1439            assert!(
1440                strings.contains(&format!("{}", lex).as_str())
1441                    || strings.contains(&format!("{}", lex).replace('ε', "").as_str())
1442            )
1443        }
1444        let lex_2 = Lexicon::new(lex.lexemes().unwrap(), false);
1445        assert_eq!(lex, lex_2);
1446        assert_eq!(
1447            format!("{}", Dot::new(&lex.graph)),
1448            "digraph {
1449    0 [ label = \"root\" ]
1450    1 [ label = \"C\" ]
1451    2 [ label = \"V=\" ]
1452    3 [ label = \"ε\" ]
1453    4 [ label = \"+W\" ]
1454    5 [ label = \"V=\" ]
1455    6 [ label = \"ε\" ]
1456    7 [ label = \"V\" ]
1457    8 [ label = \"=D\" ]
1458    9 [ label = \"C=\" ]
1459    10 [ label = \"knows\" ]
1460    11 [ label = \"says\" ]
1461    12 [ label = \"D=\" ]
1462    13 [ label = \"prefers\" ]
1463    14 [ label = \"drinks\" ]
1464    15 [ label = \"N\" ]
1465    16 [ label = \"king\" ]
1466    17 [ label = \"wine\" ]
1467    18 [ label = \"beer\" ]
1468    19 [ label = \"queen\" ]
1469    20 [ label = \"D\" ]
1470    21 [ label = \"N=\" ]
1471    22 [ label = \"the\" ]
1472    23 [ label = \"-W\" ]
1473    24 [ label = \"D\" ]
1474    25 [ label = \"N=\" ]
1475    26 [ label = \"which\" ]
1476    0 -> 1 [ label = \"-2.8042006992676702\" ]
1477    1 -> 2 [ label = \"-0.6931471805599453\" ]
1478    2 -> 3 [ label = \"0\" ]
1479    1 -> 4 [ label = \"-0.6931471805599453\" ]
1480    4 -> 5 [ label = \"0\" ]
1481    5 -> 6 [ label = \"0\" ]
1482    0 -> 7 [ label = \"-0.8042006992676702\" ]
1483    7 -> 8 [ label = \"0\" ]
1484    8 -> 9 [ label = \"-0.6931471805599454\" ]
1485    9 -> 10 [ label = \"-0.6931471805599453\" ]
1486    9 -> 11 [ label = \"-0.6931471805599453\" ]
1487    8 -> 12 [ label = \"-0.6931471805599454\" ]
1488    12 -> 13 [ label = \"-0.6931471805599453\" ]
1489    12 -> 14 [ label = \"-0.6931471805599453\" ]
1490    0 -> 15 [ label = \"-0.8042006992676702\" ]
1491    15 -> 16 [ label = \"-1.3862943611198906\" ]
1492    15 -> 17 [ label = \"-1.3862943611198906\" ]
1493    15 -> 18 [ label = \"-1.3862943611198906\" ]
1494    15 -> 19 [ label = \"-1.3862943611198906\" ]
1495    0 -> 20 [ label = \"-3.8042006992676702\" ]
1496    20 -> 21 [ label = \"0\" ]
1497    21 -> 22 [ label = \"0\" ]
1498    0 -> 23 [ label = \"-3.8042006992676702\" ]
1499    23 -> 24 [ label = \"0\" ]
1500    24 -> 25 [ label = \"0\" ]
1501    25 -> 26 [ label = \"0\" ]
1502}
1503"
1504        );
1505        let n_categories = lex.categories().collect::<HashSet<_>>().len();
1506        assert_eq!(4, n_categories);
1507        let n_licensors = lex.licensor_types().collect::<HashSet<_>>().len();
1508        assert_eq!(1, n_licensors);
1509        let mut lemmas: Vec<_> = lex.lemmas().collect();
1510        lemmas.sort();
1511        assert_eq!(
1512            lemmas,
1513            vec![
1514                &None,
1515                &None,
1516                &Some("beer"),
1517                &Some("drinks"),
1518                &Some("king"),
1519                &Some("knows"),
1520                &Some("prefers"),
1521                &Some("queen"),
1522                &Some("says"),
1523                &Some("the"),
1524                &Some("which"),
1525                &Some("wine")
1526            ]
1527        );
1528
1529        let lex = Lexicon::from_string(COPY_LANGUAGE)?;
1530        assert_ne!(lex, lex_2);
1531        assert_eq!(
1532            "digraph {
1533    0 [ label = \"root\" ]
1534    1 [ label = \"T\" ]
1535    2 [ label = \"+l\" ]
1536    3 [ label = \"+r\" ]
1537    4 [ label = \"=T\" ]
1538    5 [ label = \"ε\" ]
1539    6 [ label = \"-l\" ]
1540    7 [ label = \"-r\" ]
1541    8 [ label = \"T\" ]
1542    9 [ label = \"ε\" ]
1543    10 [ label = \"T\" ]
1544    11 [ label = \"+l\" ]
1545    12 [ label = \"=A\" ]
1546    13 [ label = \"a\" ]
1547    14 [ label = \"-r\" ]
1548    15 [ label = \"A\" ]
1549    16 [ label = \"+r\" ]
1550    17 [ label = \"=T\" ]
1551    18 [ label = \"a\" ]
1552    19 [ label = \"=B\" ]
1553    20 [ label = \"b\" ]
1554    21 [ label = \"B\" ]
1555    22 [ label = \"+r\" ]
1556    23 [ label = \"=T\" ]
1557    24 [ label = \"b\" ]
1558    0 -> 1 [ label = \"-2.4076059644443806\" ]
1559    1 -> 2 [ label = \"0\" ]
1560    2 -> 3 [ label = \"0\" ]
1561    3 -> 4 [ label = \"0\" ]
1562    4 -> 5 [ label = \"0\" ]
1563    0 -> 6 [ label = \"-0.4076059644443806\" ]
1564    6 -> 7 [ label = \"-1.3132616875182228\" ]
1565    7 -> 8 [ label = \"0\" ]
1566    8 -> 9 [ label = \"0\" ]
1567    6 -> 10 [ label = \"-0.3132616875182228\" ]
1568    10 -> 11 [ label = \"0\" ]
1569    11 -> 12 [ label = \"-0.6931471805599453\" ]
1570    12 -> 13 [ label = \"0\" ]
1571    0 -> 14 [ label = \"-1.4076059644443804\" ]
1572    14 -> 15 [ label = \"-0.6931471805599453\" ]
1573    15 -> 16 [ label = \"0\" ]
1574    16 -> 17 [ label = \"0\" ]
1575    17 -> 18 [ label = \"0\" ]
1576    11 -> 19 [ label = \"-0.6931471805599453\" ]
1577    19 -> 20 [ label = \"0\" ]
1578    14 -> 21 [ label = \"-0.6931471805599453\" ]
1579    21 -> 22 [ label = \"0\" ]
1580    22 -> 23 [ label = \"0\" ]
1581    23 -> 24 [ label = \"0\" ]
1582}
1583",
1584            format!("{}", Dot::new(&lex.graph))
1585        );
1586        let n_categories = lex.categories().collect::<HashSet<_>>().len();
1587        assert_eq!(3, n_categories);
1588        let n_licensors = lex.licensor_types().collect::<HashSet<_>>().len();
1589        assert_eq!(2, n_licensors);
1590        Ok(())
1591    }
1592
1593    #[test]
1594    fn conversion() -> anyhow::Result<()> {
1595        let lex = Lexicon::from_string(STABLER2011)?;
1596        let _ = lex.to_owned_values();
1597        Ok(())
1598    }
1599    #[test]
1600    fn conversion2() -> anyhow::Result<()> {
1601        let lex = Lexicon::from_string(STABLER2011)?;
1602        let lex2 = lex.remap_lexicon(|x| x.to_string(), |c| c.to_string());
1603        let lex3 = lex2.remap_lexicon(|x| x.as_str(), |c| c.as_str());
1604        assert_eq!(lex, lex3);
1605
1606        let x = ParsingError::NoLicensorOrCategory(LicenseeOrCategory::Category("s"));
1607        let _y: ParsingError<String> = x.inner_into();
1608
1609        Ok(())
1610    }
1611}