1#[cfg(feature = "pretty")]
4use serde::Serialize;
5
6use crate::Direction;
7use crate::parsing::PossibleTree;
8use chumsky::{extra::ParserExtra, label::LabelError, text::TextExpected, util::MaybeRef};
9use chumsky::{
10 prelude::*,
11 text::{inline_whitespace, newline},
12};
13use itertools::Itertools;
14
15use logprob::{LogProb, Softmax};
16
17use logprob::LogSumExp;
18
19use petgraph::Direction::Outgoing;
20use petgraph::dot::Dot;
21use petgraph::prelude::StableDiGraph;
22use petgraph::{
23 graph::NodeIndex,
24 visit::{EdgeRef, IntoNodeReferences},
25};
26use std::result::Result;
27use std::{
28 fmt::{Debug, Display},
29 hash::Hash,
30};
31use thiserror::Error;
32
33#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
34pub enum Feature<Category: Eq> {
36 Category(Category),
38 Selector(Category, Direction),
40 Affix(Category, Direction),
42 Licensor(Category),
44 Licensee(Category),
46}
47
48impl<C: Eq> Feature<C> {
49 pub fn inner(&self) -> &C {
51 match self {
52 Feature::Category(c)
53 | Feature::Selector(c, _)
54 | Feature::Affix(c, _)
55 | Feature::Licensor(c)
56 | Feature::Licensee(c) => c,
57 }
58 }
59
60 pub fn into_inner(self) -> C {
62 match self {
63 Feature::Category(c)
64 | Feature::Selector(c, _)
65 | Feature::Affix(c, _)
66 | Feature::Licensor(c)
67 | Feature::Licensee(c) => c,
68 }
69 }
70
71 pub fn is_selector(&self) -> bool {
73 matches!(self, Self::Selector(_, _))
74 }
75}
76
77#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
78#[allow(missing_docs)]
80pub enum LicenseeOrCategory<C> {
81 Licensee(C),
82 Category(C),
83}
84
85impl<C: Debug> Display for LicenseeOrCategory<C> {
86 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
87 match self {
88 LicenseeOrCategory::Licensee(c) => write!(f, "-{c:?}"),
89 LicenseeOrCategory::Category(c) => write!(f, "{c:?}"),
90 }
91 }
92}
93
94#[derive(Error, Clone, Copy, Debug)]
95pub enum LexiconError {
97 #[error("There is no node ({0:?})")]
99 MissingNode(NodeIndex),
100
101 #[error("{0:?} is not a lexeme")]
103 MissingLexeme(LexemeId),
104
105 #[error("Following the parents of node, ({0:?}), doesn't lead to root")]
107 DoesntGoToRoot(NodeIndex),
108
109 #[error("Following the parents of node, ({0:?}), we pass through the root or another lemma")]
111 InvalidOrder(NodeIndex),
112}
113
114#[derive(Error, Clone, Copy, Debug)]
116pub enum ParsingError<C>
117where
118 C: Debug,
119{
120 #[error("There is nothing with feature ({0})")]
122 NoLicensorOrCategory(LicenseeOrCategory<C>),
123}
124
125impl<A: Debug> ParsingError<A> {
126 pub fn inner_into<B: From<A> + Debug>(self: ParsingError<A>) -> ParsingError<B> {
136 match self {
137 ParsingError::NoLicensorOrCategory(LicenseeOrCategory::Licensee(c)) => {
138 ParsingError::NoLicensorOrCategory(LicenseeOrCategory::Licensee(c.into()))
139 }
140 ParsingError::NoLicensorOrCategory(LicenseeOrCategory::Category(c)) => {
141 ParsingError::NoLicensorOrCategory(LicenseeOrCategory::Category(c.into()))
142 }
143 }
144 }
145}
146
147fn grammar_parser<'src>()
148-> impl Parser<'src, &'src str, Lexicon<&'src str, &'src str>, extra::Err<Rich<'src, char>>> {
149 entry_parser::<extra::Err<Rich<'src, char>>>()
150 .separated_by(newline())
151 .allow_leading()
152 .allow_trailing()
153 .collect::<Vec<_>>()
154 .map(|x| Lexicon::new(x, true))
155 .then_ignore(end())
156}
157
158fn entry_parser<'src, E>() -> impl Parser<'src, &'src str, LexicalEntry<&'src str, &'src str>, E>
159where
160 E: ParserExtra<'src, &'src str>,
161 E::Error: LabelError<'src, &'src str, TextExpected<'src, &'src str>>
162 + LabelError<'src, &'src str, MaybeRef<'src, char>>
163 + LabelError<'src, &'src str, &'static str>,
164{
165 let feature_name = any()
166 .and_is(none_of([
167 '\t', '\n', ' ', '+', '-', '=', '>', '<', ':', '\r',
168 ]))
169 .repeated()
170 .at_least(1)
171 .labelled("feature name")
172 .to_slice();
173
174 let affix = choice((
175 feature_name
176 .then_ignore(just("<="))
177 .map(|x| Feature::Affix(x, Direction::Right))
178 .labelled("right affix"),
179 just("=>")
180 .ignore_then(feature_name)
181 .map(|x| Feature::Affix(x, Direction::Left))
182 .labelled("left affix"),
183 ));
184
185 let pre_category_features = choice((
186 feature_name
187 .then_ignore(just("="))
188 .map(|x| Feature::Selector(x, Direction::Right))
189 .labelled("right selector"),
190 just("=")
191 .ignore_then(feature_name)
192 .map(|x| Feature::Selector(x, Direction::Left))
193 .labelled("left selector"),
194 just("+")
195 .ignore_then(feature_name)
196 .map(Feature::Licensor)
197 .labelled("licensor"),
198 ));
199
200 choice((
201 just("ε").to(None),
202 (none_of(['\t', '\n', ' ', '+', '-', '=', ':', '\r'])
203 .repeated()
204 .at_least(1)
205 .to_slice())
206 .or_not(),
207 ))
208 .labelled("lemma")
209 .then_ignore(
210 just("::")
211 .padded_by(inline_whitespace())
212 .labelled("lemma feature seperator"),
213 )
214 .then(
215 affix
216 .or_not()
217 .then_ignore(inline_whitespace().or_not())
218 .then(
219 pre_category_features
220 .separated_by(inline_whitespace())
221 .allow_trailing()
222 .collect::<Vec<_>>()
223 .labelled("pre category features"),
224 )
225 .map(|(affix, mut feats)| {
226 if let Some(affix) = affix {
227 feats.insert(0, affix);
228 }
229 feats
230 }),
231 )
232 .then(
233 feature_name
234 .map(Feature::Category)
235 .labelled("category feature"),
236 )
237 .then(
238 just('-')
239 .ignore_then(feature_name)
240 .map(Feature::Licensee)
241 .labelled("licensee")
242 .separated_by(inline_whitespace())
243 .allow_leading()
244 .allow_trailing()
245 .collect::<Vec<_>>()
246 .labelled("licensees"),
247 )
248 .map(|(((lemma, mut features), category), mut licensees)| {
249 features.push(category);
250 features.append(&mut licensees);
251 LexicalEntry::new(lemma, features)
252 })
253}
254
255#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
256pub(crate) enum FeatureOrLemma<T: Eq, Category: Eq> {
257 Root,
258 Lemma(Option<T>),
259 Feature(Feature<Category>),
260 Complement(Category, Direction),
261}
262
263impl<T: Eq, C: Eq> FeatureOrLemma<T, C> {
264 fn is_lemma(&self) -> bool {
265 matches!(self, FeatureOrLemma::Lemma(_))
266 }
267}
268
269#[cfg(feature = "sampling")]
270impl<T: Eq, C: Eq + Clone> FeatureOrLemma<T, C> {
271 fn to_complement_mut(&mut self) {
272 if let FeatureOrLemma::Feature(Feature::Selector(c, d)) = self {
273 *self = FeatureOrLemma::Complement(c.clone(), *d);
274 }
275 }
276}
277
278impl<T: Eq, Category: Eq> From<LexicalEntry<T, Category>> for Vec<FeatureOrLemma<T, Category>> {
279 fn from(value: LexicalEntry<T, Category>) -> Self {
280 let LexicalEntry { lemma, features } = value;
281 std::iter::once(lemma)
282 .map(|x| FeatureOrLemma::Lemma(x))
283 .chain(
284 features
285 .into_iter()
286 .enumerate()
287 .map(|(i, feat)| match feat {
288 Feature::Selector(c, d) if i == 0 => FeatureOrLemma::Complement(c, d),
291 _ => FeatureOrLemma::Feature(feat),
292 }),
293 )
294 .collect()
295 }
296}
297
298#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
299pub struct LexicalEntry<T: Eq, Category: Eq> {
301 pub(crate) lemma: Option<T>,
302 pub(crate) features: Vec<Feature<Category>>,
303}
304impl<T: Eq, Category: Eq> LexicalEntry<T, Category> {
305 pub fn new(lemma: Option<T>, features: Vec<Feature<Category>>) -> LexicalEntry<T, Category> {
307 LexicalEntry { lemma, features }
308 }
309
310 pub fn lemma(&self) -> &Option<T> {
312 &self.lemma
313 }
314
315 pub fn features(&self) -> &[Feature<Category>] {
317 &self.features
318 }
319
320 pub fn remap<'lex, T2: Eq, C2: Eq>(
322 &'lex self,
323 lemma_map: impl Fn(&'lex T) -> T2,
324 category_map: impl Fn(&'lex Category) -> C2,
325 ) -> LexicalEntry<T2, C2> {
326 let lemma = self.lemma.as_ref().map(lemma_map);
327 let features = self
328 .features
329 .iter()
330 .map(|x| match x {
331 Feature::Category(c) => Feature::Category(category_map(c)),
332 Feature::Selector(c, direction) => Feature::Selector(category_map(c), *direction),
333 Feature::Affix(c, direction) => Feature::Affix(category_map(c), *direction),
334 Feature::Licensor(c) => Feature::Licensor(category_map(c)),
335 Feature::Licensee(c) => Feature::Licensee(category_map(c)),
336 })
337 .collect();
338 LexicalEntry { lemma, features }
339 }
340
341 pub fn category(&self) -> &Category {
343 let mut cat = None;
344 for lex in self.features.iter() {
345 if let Feature::Category(c) = lex {
346 cat = Some(c);
347 break;
348 }
349 }
350 cat.expect("All lexical entries must have one category")
351 }
352}
353
354impl<T: Display + PartialEq + Eq, Category: Display + PartialEq + Eq> Display
355 for LexicalEntry<T, Category>
356{
357 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
358 if let Some(t) = &self.lemma {
359 write!(f, "{t}::")?;
360 } else {
361 write!(f, "ε::")?;
362 }
363 for (i, feature) in self.features.iter().enumerate() {
364 if i > 0 {
365 write!(f, " ")?;
366 }
367 write!(f, "{feature}")?;
368 }
369 Ok(())
370 }
371}
372
373impl<Category: Display + Eq> Display for Feature<Category> {
374 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
375 match self {
376 Feature::Category(x) => write!(f, "{x}"),
377 Feature::Selector(x, Direction::Left) => write!(f, "={x}"),
378 Feature::Selector(x, Direction::Right) => write!(f, "{x}="),
379 Feature::Affix(x, Direction::Left) => write!(f, "=>{x}"),
380 Feature::Affix(x, Direction::Right) => write!(f, "{x}<="),
381 Feature::Licensor(x) => write!(f, "+{x}"),
382 Feature::Licensee(x) => write!(f, "-{x}"),
383 }
384 }
385}
386
387#[derive(Debug, Clone)]
418pub struct Lexicon<T: Eq, Category: Eq> {
419 graph: StableDiGraph<FeatureOrLemma<T, Category>, LogProb<f64>>,
420 root: NodeIndex,
421 leaves: Vec<LexemeId>,
422}
423
424#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
425pub struct LexemeId(pub(crate) NodeIndex);
427
428#[cfg(feature = "pretty")]
429impl Serialize for LexemeId {
430 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
431 where
432 S: serde::Serializer,
433 {
434 use serde::ser::SerializeTupleStruct;
435
436 let mut s = serializer.serialize_tuple_struct("LexemeId", 1)?;
437 s.serialize_field(&self.0.index())?;
438 s.end()
439 }
440}
441
442impl<T: Eq, Category: Eq> PartialEq for Lexicon<T, Category> {
443 fn eq(&self, other: &Self) -> bool {
444 self.root == other.root
445 && self.leaves == other.leaves
446 && self.graph.node_weights().eq(other.graph.node_weights())
447 && self.graph.edge_weights().eq(other.graph.edge_weights())
448 }
449}
450impl<T: Eq, Category: Eq> Eq for Lexicon<T, Category> {}
451impl<T, Category> Lexicon<T, Category>
452where
453 T: Eq + std::fmt::Debug + Clone + Display,
454 Category: Eq + std::fmt::Debug + Clone + Display,
455{
456 pub fn graphviz(&self) -> String {
458 let dot = Dot::new(&self.graph);
459 format!("{dot}")
460 }
461}
462
463fn renormalise_weights<T: Eq + Clone, C: Eq + Clone>(
464 mut graph: StableDiGraph<FeatureOrLemma<T, C>, f64>,
465) -> StableDiGraph<FeatureOrLemma<T, C>, LogProb<f64>> {
466 for node_index in graph.node_indices().collect_vec() {
468 let edges: Vec<_> = graph
469 .edges_directed(node_index, petgraph::Direction::Outgoing)
470 .map(|e| (*e.weight(), e.id()))
471 .collect();
472
473 match edges.len().cmp(&1) {
474 std::cmp::Ordering::Equal => {
475 for (_, edge) in edges {
476 graph[edge] = 0.0;
477 }
478 }
479 std::cmp::Ordering::Greater => {
480 let dist = edges
481 .iter()
482 .map(|(w, _edge)| *w)
483 .softmax()
484 .unwrap()
485 .map(|x| x.into_inner());
486
487 for (new_weight, (_weight, edge)) in dist.zip(edges.iter()) {
488 graph[*edge] = new_weight;
489 }
490 }
491 _ => (),
492 }
493 }
494
495 graph.map(|_, n| n.clone(), |_, e| LogProb::new(*e).unwrap())
497}
498
499impl<T: Eq + std::fmt::Debug + Clone, Category: Eq + std::fmt::Debug + Clone> Lexicon<T, Category> {
500 pub(crate) fn possible_heads(
501 &self,
502 nx: NodeIndex,
503 depth: usize,
504 ) -> Result<PossibleTree, ParsingError<Category>> {
505 if depth > 10 {
506 return Ok(PossibleTree::empty());
507 }
508 let Some(FeatureOrLemma::Feature(Feature::Affix(category, direction))) =
509 self.graph.node_weight(nx)
510 else {
511 panic!("Node must be an affix!")
512 };
513
514 let mut children = vec![];
515 let mut lemmas = vec![];
516 let x: NodeIndex = self.find_category(category)?;
517
518 let mut stack = vec![x];
519
520 while let Some(nx) = stack.pop() {
521 match self.graph.node_weight(nx).unwrap() {
522 FeatureOrLemma::Feature(Feature::Affix(..)) => {
523 children.push(self.possible_heads(nx, depth + 1)?)
524 }
525 FeatureOrLemma::Lemma(_) => lemmas.push(LexemeId(nx)),
526 _ => stack.extend(self.children_of(nx)),
527 }
528 }
529 children.push(PossibleTree::just_heads(lemmas));
530
531 let heads: Vec<LexemeId> = self
532 .children_of(nx)
533 .filter_map(|child| {
534 if matches!(
535 self.graph.node_weight(child).unwrap(),
536 FeatureOrLemma::Lemma(_)
537 ) {
538 Some(LexemeId(child))
539 } else {
540 None
541 }
542 })
543 .collect();
544 Ok(PossibleTree::just_heads(heads).merge(children, *direction))
545 }
546}
547
548pub struct Climber<'a, T: Eq, C: Eq> {
550 lex: &'a Lexicon<T, C>,
551 pos: NodeIndex,
552}
553
554impl<'a, T: Eq, C: Eq + Clone> Iterator for Climber<'a, T, C> {
555 type Item = Feature<C>;
556
557 fn next(&mut self) -> Option<Self::Item> {
558 self.pos = self.lex.parent_of(self.pos)?;
559
560 match self.lex.graph.node_weight(self.pos) {
561 Some(x) => match x {
562 FeatureOrLemma::Root => None,
563 FeatureOrLemma::Lemma(_) => None,
564 FeatureOrLemma::Feature(feature) => Some(feature.clone()),
565 FeatureOrLemma::Complement(c, direction) => {
566 Some(Feature::Selector(c.clone(), *direction))
567 }
568 },
569 None => None,
570 }
571 }
572}
573
574pub(crate) fn fix_weights_per_node<T: Eq, C: Eq>(
575 graph: &mut StableDiGraph<FeatureOrLemma<T, C>, LogProb<f64>>,
576 node_index: NodeIndex,
577) {
578 let sum = graph
579 .edges_directed(node_index, Outgoing)
580 .map(|x| x.weight())
581 .log_sum_exp_float_no_alloc();
582
583 if sum != 0.0 {
584 let edges: Vec<_> = graph
585 .edges_directed(node_index, Outgoing)
586 .map(|e| e.id())
587 .collect();
588 for e in edges {
589 graph[e] = LogProb::new(graph[e].into_inner() - sum).unwrap();
590 }
591 }
592}
593
594pub(crate) fn fix_weights<T: Eq + Clone, C: Eq + Clone>(
595 graph: &mut StableDiGraph<FeatureOrLemma<T, C>, LogProb<f64>>,
596) {
597 for node_index in graph.node_indices().collect_vec() {
599 fix_weights_per_node(graph, node_index);
600 }
601}
602
603impl<T: Eq, Category: Eq> Lexicon<T, Category> {
604 pub(crate) fn add_lexical_entry(
605 &mut self,
606 lexical_entry: LexicalEntry<T, Category>,
607 ) -> Option<LexemeId> {
608 let LexicalEntry { lemma, features } = lexical_entry;
609
610 let mut new_nodes = vec![FeatureOrLemma::Lemma(lemma)];
611 for (i, f) in features.into_iter().enumerate() {
612 let f = if i == 0 && f.is_selector() {
613 let Feature::Selector(c, d) = f else {
614 panic!("We checked it was a selector!")
615 };
616 FeatureOrLemma::Complement(c, d)
617 } else {
618 FeatureOrLemma::Feature(f)
619 };
620 new_nodes.push(f);
621 }
622
623 let mut pos = self.root;
624 while let Some(f) = new_nodes.pop() {
625 if let Some(p) = self.children_of(pos).find(|x| self.find(*x) == Some(&f)) {
626 pos = p;
627 } else {
628 let new_pos = self.graph.add_node(f);
629 self.graph.add_edge(pos, new_pos, LogProb::prob_of_one());
630 fix_weights_per_node(&mut self.graph, pos);
631 pos = new_pos;
632 }
633 }
634 if self.leaves.contains(&LexemeId(pos)) {
635 None
636 } else {
637 self.leaves.push(LexemeId(pos));
638 Some(LexemeId(pos))
639 }
640 }
641
642 pub(crate) fn n_children(&self, nx: NodeIndex) -> usize {
644 self.graph
645 .edges_directed(nx, petgraph::Direction::Outgoing)
646 .count()
647 }
648
649 pub(crate) fn children_of(&self, nx: NodeIndex) -> impl Iterator<Item = NodeIndex> + '_ {
651 self.graph
652 .edges_directed(nx, petgraph::Direction::Outgoing)
653 .map(|e| e.target())
654 }
655
656 pub(crate) fn find(&self, nx: NodeIndex) -> Option<&FeatureOrLemma<T, Category>> {
658 self.graph.node_weight(nx)
659 }
660
661 pub(crate) fn node_to_features<'a>(&'a self, nx: NodeIndex) -> Climber<'a, T, Category> {
663 Climber { lex: self, pos: nx }
664 }
665
666 pub fn leaf_to_features<'a>(&'a self, lexeme: LexemeId) -> Option<Climber<'a, T, Category>> {
668 if !matches!(
669 self.graph.node_weight(lexeme.0),
670 Some(FeatureOrLemma::Lemma(_))
671 ) {
672 return None;
673 }
674
675 Some(Climber {
676 lex: self,
677 pos: lexeme.0,
678 })
679 }
680
681 pub fn leaf_to_lemma(&self, lexeme_id: LexemeId) -> Option<&Option<T>> {
683 match self.graph.node_weight(lexeme_id.0) {
684 Some(x) => {
685 if let FeatureOrLemma::Lemma(l) = x {
686 Some(l)
687 } else {
688 None
689 }
690 }
691 None => None,
692 }
693 }
694
695 pub fn category(&self, lexeme_id: LexemeId) -> Option<&Category> {
697 let mut pos = lexeme_id.0;
698 while !matches!(
699 self.graph[pos],
700 FeatureOrLemma::Feature(Feature::Category(_))
701 ) {
702 let new_pos = self.parent_of(pos)?;
703 pos = new_pos
704 }
705 let FeatureOrLemma::Feature(Feature::Category(cat)) = &self.graph[pos] else {
706 return None;
707 };
708 Some(cat)
709 }
710
711 pub(crate) fn parent_of(&self, nx: NodeIndex) -> Option<NodeIndex> {
713 self.graph
714 .edges_directed(nx, petgraph::Direction::Incoming)
715 .next()
716 .map(|e| e.source())
717 }
718}
719
720impl<T: Eq + std::fmt::Debug + Clone, Category: Eq + std::fmt::Debug + Clone> Lexicon<T, Category> {
721 pub fn is_complement(&self, nx: NodeIndex) -> bool {
723 matches!(
724 self.graph.node_weight(nx).unwrap(),
725 FeatureOrLemma::Complement(_, _) | FeatureOrLemma::Feature(Feature::Affix(_, _))
726 )
727 }
728
729 pub fn n_nodes(&self) -> usize {
731 self.graph.node_count()
732 }
733
734 pub fn leaves(&self) -> &[LexemeId] {
736 &self.leaves
737 }
738
739 pub fn sibling_leaves(&self) -> Vec<Vec<LexemeId>> {
742 let mut leaves = self.leaves.clone();
743 let mut result = vec![];
744 while let Some(leaf) = leaves.pop() {
745 let siblings: Vec<_> = self
746 .children_of(self.parent_of(leaf.0).unwrap())
747 .filter_map(|a| {
748 if matches!(self.graph.node_weight(a).unwrap(), FeatureOrLemma::Lemma(_)) {
749 Some(LexemeId(a))
750 } else {
751 None
752 }
753 })
754 .collect();
755 leaves.retain(|x| !siblings.contains(x));
756 result.push(siblings);
757 }
758 result
759 }
760
761 pub fn lexemes_and_ids(
763 &self,
764 ) -> Result<impl Iterator<Item = (LexemeId, LexicalEntry<T, Category>)>, LexiconError> {
765 Ok(self.leaves().iter().copied().zip(self.lexemes()?))
766 }
767
768 pub fn lexemes(&self) -> Result<Vec<LexicalEntry<T, Category>>, LexiconError> {
771 let mut v = vec![];
772
773 for leaf in self.leaves.iter() {
775 if let FeatureOrLemma::Lemma(lemma) = &self.graph[leaf.0] {
776 let mut features = vec![];
777 let mut nx = leaf.0;
778 while let Some(parent) = self.parent_of(nx) {
779 if parent == self.root {
780 break;
781 } else if let FeatureOrLemma::Feature(f) = &self.graph[parent] {
782 features.push(f.clone());
783 } else if let FeatureOrLemma::Complement(c, d) = &self.graph[parent] {
784 features.push(Feature::Selector(c.clone(), *d));
785 }
786 nx = parent;
787 }
788
789 v.push(LexicalEntry {
790 lemma: lemma.as_ref().cloned(),
791 features,
792 })
793 } else {
794 return Err(LexiconError::MissingLexeme(*leaf));
795 }
796 }
797 Ok(v)
798 }
799
800 pub fn get_lexical_entry(
802 &self,
803 lexeme_id: LexemeId,
804 ) -> Result<LexicalEntry<T, Category>, LexiconError> {
805 let Some(lemma) = self.graph.node_weight(lexeme_id.0) else {
806 return Err(LexiconError::MissingLexeme(lexeme_id));
807 };
808 let lemma = match lemma {
809 FeatureOrLemma::Lemma(lemma) => lemma,
810 FeatureOrLemma::Feature(_)
811 | FeatureOrLemma::Root
812 | FeatureOrLemma::Complement(_, _) => {
813 return Err(LexiconError::MissingLexeme(lexeme_id));
814 }
815 }
816 .clone();
817
818 let features = self.leaf_to_features(lexeme_id).unwrap().collect();
819 Ok(LexicalEntry { features, lemma })
820 }
821
822 pub fn lemmas(&self) -> impl Iterator<Item = &Option<T>> {
824 self.leaves.iter().filter_map(|x| match &self.graph[x.0] {
825 FeatureOrLemma::Lemma(x) => Some(x),
826 _ => None,
827 })
828 }
829
830 pub fn new(items: Vec<LexicalEntry<T, Category>>, collapse_lemmas: bool) -> Self {
832 let n_items = items.len();
833 Self::new_with_weights(
834 items,
835 std::iter::repeat_n(1.0, n_items).collect(),
836 collapse_lemmas,
837 )
838 }
839
840 pub fn new_with_weights(
843 items: Vec<LexicalEntry<T, Category>>,
844 weights: Vec<f64>,
845 collapse_lemmas: bool,
846 ) -> Self {
847 let mut graph = StableDiGraph::new();
848 let root_index = graph.add_node(FeatureOrLemma::Root);
849 let mut leaves = vec![];
850
851 for (lexeme, weight) in items.into_iter().zip(weights.into_iter()) {
852 let lexeme: Vec<FeatureOrLemma<T, Category>> = lexeme.into();
853 let mut node_index = root_index;
854
855 for feature in lexeme.into_iter().rev() {
856 node_index = if (!feature.is_lemma() || collapse_lemmas)
859 && let Some((nx, edge_index)) = graph
860 .edges_directed(node_index, petgraph::Direction::Outgoing)
861 .find(|x| graph[x.target()] == feature)
862 .map(|x| (x.target(), x.id()))
863 {
864 graph[edge_index] += weight;
865 nx
866 } else {
867 let new_node_index = graph.add_node(feature);
868 graph.add_edge(node_index, new_node_index, weight);
869 new_node_index
870 };
871
872 if let FeatureOrLemma::Lemma(_) = graph[node_index] {
873 leaves.push(LexemeId(node_index));
874 };
875 }
876 }
877 let graph = renormalise_weights(graph);
878 Lexicon {
879 graph,
880 leaves,
881 root: root_index,
882 }
883 }
884
885 pub(crate) fn find_category(
887 &self,
888 category: &Category,
889 ) -> Result<NodeIndex, ParsingError<Category>> {
890 match self
891 .graph
892 .neighbors_directed(self.root, petgraph::Direction::Outgoing)
893 .find(|i| match &self.graph[*i] {
894 FeatureOrLemma::Feature(Feature::Category(c)) => c == category,
895 _ => false,
896 }) {
897 Some(x) => Ok(x),
898 None => Err(ParsingError::NoLicensorOrCategory(
899 LicenseeOrCategory::Category(category.clone()),
900 )),
901 }
902 }
903
904 pub(crate) fn find_licensee(
906 &self,
907 category: &Category,
908 ) -> Result<NodeIndex, ParsingError<Category>> {
909 match self
910 .graph
911 .neighbors_directed(self.root, petgraph::Direction::Outgoing)
912 .find(|i| match &self.graph[*i] {
913 FeatureOrLemma::Feature(Feature::Licensee(c)) => c == category,
914 _ => false,
915 }) {
916 Some(x) => Ok(x),
917 None => Err(ParsingError::NoLicensorOrCategory(
918 LicenseeOrCategory::Licensee(category.clone()),
919 )),
920 }
921 }
922
923 pub(crate) fn get(
924 &self,
925 nx: NodeIndex,
926 ) -> Option<(&FeatureOrLemma<T, Category>, LogProb<f64>)> {
927 if let Some(x) = self.graph.node_weight(nx) {
928 let p = self
929 .graph
930 .edges_directed(nx, petgraph::Direction::Incoming)
931 .next()
932 .unwrap()
933 .weight();
934 Some((x, *p))
935 } else {
936 None
937 }
938 }
939
940 pub(crate) fn get_feature_category(&self, nx: NodeIndex) -> Option<&Category> {
942 self.graph.node_weight(nx).and_then(|x| match x {
943 FeatureOrLemma::Root => None,
944 FeatureOrLemma::Lemma(_) => None,
945 FeatureOrLemma::Feature(feature) => match feature {
946 Feature::Category(c)
947 | Feature::Selector(c, _)
948 | Feature::Licensor(c)
949 | Feature::Licensee(c)
950 | Feature::Affix(c, _) => Some(c),
951 },
952 FeatureOrLemma::Complement(c, _) => Some(c),
953 })
954 }
955
956 pub fn categories(&self) -> impl Iterator<Item = &Category> {
958 self.graph.node_references().filter_map(|(_, x)| match x {
959 FeatureOrLemma::Feature(Feature::Category(x)) => Some(x),
960 _ => None,
961 })
962 }
963
964 pub fn licensor_types(&self) -> impl Iterator<Item = &Category> {
966 self.graph.node_references().filter_map(|(_, x)| match x {
967 FeatureOrLemma::Feature(Feature::Licensor(x))
968 | FeatureOrLemma::Feature(Feature::Licensee(x)) => Some(x),
969 _ => None,
970 })
971 }
972}
973
974impl<'src> Lexicon<&'src str, &'src str> {
975 pub fn from_string(s: &'src str) -> Result<Self, LexiconParsingError<'src>> {
977 grammar_parser()
978 .padded()
979 .then_ignore(end())
980 .parse(s)
981 .into_result()
982 .map_err(|x| x.into())
983 }
984}
985
986impl<T: Eq, C: Eq> Lexicon<T, C> {
987 pub fn remap_lexicon<'lex, T2: Eq, C2: Eq>(
990 &'lex self,
991 lemma_map: impl Fn(&'lex T) -> T2,
992 category_map: impl Fn(&'lex C) -> C2,
993 ) -> Lexicon<T2, C2> {
994 let Lexicon {
995 graph,
996 root,
997 leaves,
998 } = self;
999
1000 let graph = graph.map(
1003 |_, x| match x {
1004 FeatureOrLemma::Root => FeatureOrLemma::Root,
1005 FeatureOrLemma::Lemma(Some(s)) => FeatureOrLemma::Lemma(Some(lemma_map(s))),
1006 FeatureOrLemma::Lemma(None) => FeatureOrLemma::Lemma(None),
1007 FeatureOrLemma::Feature(Feature::Category(c)) => {
1008 FeatureOrLemma::Feature(Feature::Category(category_map(c)))
1009 }
1010 FeatureOrLemma::Feature(Feature::Licensor(c)) => {
1011 FeatureOrLemma::Feature(Feature::Licensor(category_map(c)))
1012 }
1013 FeatureOrLemma::Feature(Feature::Licensee(c)) => {
1014 FeatureOrLemma::Feature(Feature::Licensee(category_map(c)))
1015 }
1016 FeatureOrLemma::Feature(Feature::Affix(c, d)) => {
1017 FeatureOrLemma::Feature(Feature::Affix(category_map(c), *d))
1018 }
1019 FeatureOrLemma::Feature(Feature::Selector(c, d)) => {
1020 FeatureOrLemma::Feature(Feature::Selector(category_map(c), *d))
1021 }
1022 FeatureOrLemma::Complement(c, direction) => {
1023 FeatureOrLemma::Complement(category_map(c), *direction)
1024 }
1025 },
1026 |_, e| *e,
1027 );
1028 Lexicon {
1029 graph,
1030 root: *root,
1031 leaves: leaves.clone(),
1032 }
1033 }
1034}
1035
1036impl<'a> Lexicon<&'a str, &'a str> {
1037 pub fn to_owned_values(&self) -> Lexicon<String, String> {
1039 let Lexicon {
1040 graph,
1041 root,
1042 leaves,
1043 } = self;
1044
1045 let graph = graph.map(
1046 |_, x| match *x {
1047 FeatureOrLemma::Root => FeatureOrLemma::Root,
1048 FeatureOrLemma::Lemma(s) => FeatureOrLemma::Lemma(s.map(|x| x.to_owned())),
1049 FeatureOrLemma::Feature(Feature::Category(c)) => {
1050 FeatureOrLemma::Feature(Feature::Category(c.to_owned()))
1051 }
1052 FeatureOrLemma::Feature(Feature::Affix(c, d)) => {
1053 FeatureOrLemma::Feature(Feature::Affix(c.to_owned(), d))
1054 }
1055 FeatureOrLemma::Feature(Feature::Licensor(c)) => {
1056 FeatureOrLemma::Feature(Feature::Licensor(c.to_owned()))
1057 }
1058 FeatureOrLemma::Feature(Feature::Licensee(c)) => {
1059 FeatureOrLemma::Feature(Feature::Licensee(c.to_owned()))
1060 }
1061 FeatureOrLemma::Feature(Feature::Selector(c, d)) => {
1062 FeatureOrLemma::Feature(Feature::Selector(c.to_owned(), d))
1063 }
1064 FeatureOrLemma::Complement(c, direction) => {
1065 FeatureOrLemma::Complement(c.to_owned(), direction)
1066 }
1067 },
1068 |_, e| *e,
1069 );
1070 Lexicon {
1071 graph,
1072 root: *root,
1073 leaves: leaves.clone(),
1074 }
1075 }
1076}
1077
1078#[derive(Error, Debug, Clone)]
1080pub struct LexiconParsingError<'a>(pub Vec<Rich<'a, char>>);
1081
1082impl<'a> Display for LexiconParsingError<'a> {
1083 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1084 writeln!(
1085 f,
1086 "{}",
1087 self.0
1088 .iter()
1089 .map(|e| e.to_string())
1090 .collect::<Vec<_>>()
1091 .join("\n")
1092 )
1093 }
1094}
1095
1096impl<'a> From<Vec<Rich<'a, char>>> for LexiconParsingError<'a> {
1097 fn from(value: Vec<Rich<'a, char>>) -> Self {
1098 LexiconParsingError(value)
1099 }
1100}
1101
1102impl LexicalEntry<&str, &str> {
1103 pub fn parse<'a>(
1105 s: &'a str,
1106 ) -> Result<LexicalEntry<&'a str, &'a str>, LexiconParsingError<'a>> {
1107 entry_parser::<extra::Err<Rich<char>>>()
1108 .parse(s)
1109 .into_result()
1110 .map_err(|e| e.into())
1111 }
1112}
1113
1114impl<T, C> std::fmt::Display for FeatureOrLemma<T, C>
1115where
1116 T: Eq + Display,
1117 C: Eq + Display,
1118{
1119 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1120 match self {
1121 FeatureOrLemma::Root => write!(f, "root"),
1122 FeatureOrLemma::Lemma(lemma) => {
1123 if let Some(l) = lemma {
1124 write!(f, "{l}")
1125 } else {
1126 write!(f, "ε")
1127 }
1128 }
1129 FeatureOrLemma::Feature(feature) => write!(f, "{feature}"),
1130 FeatureOrLemma::Complement(c, d) => write!(f, "{}", Feature::Selector(c, *d)),
1131 }
1132 }
1133}
1134
1135impl<T, C> Display for Lexicon<T, C>
1136where
1137 T: Eq + Display + std::fmt::Debug + Clone,
1138 C: Eq + Display + std::fmt::Debug + Clone,
1139{
1140 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1141 write!(
1142 f,
1143 "{}",
1144 self.lexemes()
1145 .unwrap()
1146 .iter()
1147 .map(|l| l.to_string())
1148 .join("\n")
1149 )
1150 }
1151}
1152
1153#[cfg(feature = "semantics")]
1154mod semantics;
1155
1156#[cfg(feature = "semantics")]
1157pub use semantics::SemanticLexicon;
1158
1159pub mod mdl;
1160
1161#[cfg(feature = "sampling")]
1162pub mod mutations;
1163
1164#[cfg(test)]
1165mod tests {
1166
1167 use std::collections::HashSet;
1168
1169 use anyhow::Result;
1170
1171 use super::*;
1172
1173 #[test]
1174 fn categories() -> Result<()> {
1175 let lex = Lexicon::from_string("::V= +Z C -W")?;
1176
1177 assert_eq!(vec![&"C"], lex.categories().collect::<Vec<_>>());
1178 let mut lice = lex.licensor_types().collect::<Vec<_>>();
1179 lice.sort();
1180 assert_eq!(vec![&"W", &"Z"], lice);
1181 Ok(())
1182 }
1183
1184 type SimpleLexicalEntry<'a> = LexicalEntry<&'a str, &'a str>;
1185 #[test]
1186 fn parsing() {
1187 assert_eq!(
1188 SimpleLexicalEntry::parse("John::d").unwrap(),
1189 SimpleLexicalEntry {
1190 lemma: Some("John"),
1191 features: vec![Feature::Category("d")]
1192 }
1193 );
1194 assert_eq!(
1195 SimpleLexicalEntry::parse("eats::d= =d V").unwrap(),
1196 SimpleLexicalEntry {
1197 lemma: Some("eats"),
1198 features: vec![
1199 Feature::Selector("d", Direction::Right),
1200 Feature::Selector("d", Direction::Left),
1201 Feature::Category("V")
1202 ]
1203 }
1204 );
1205 assert_eq!(
1206 SimpleLexicalEntry::parse("::d= =d V").unwrap(),
1207 SimpleLexicalEntry::new(
1208 None,
1209 vec![
1210 Feature::Selector("d", Direction::Right),
1211 Feature::Selector("d", Direction::Left),
1212 Feature::Category("V")
1213 ]
1214 )
1215 );
1216 assert_eq!(
1217 SimpleLexicalEntry::parse("ε::d= =d V").unwrap(),
1218 SimpleLexicalEntry {
1219 lemma: None,
1220 features: vec![
1221 Feature::Selector("d", Direction::Right),
1222 Feature::Selector("d", Direction::Left),
1223 Feature::Category("V")
1224 ]
1225 }
1226 );
1227 assert_eq!(
1228 SimpleLexicalEntry::parse("ε::d<= V").unwrap(),
1229 SimpleLexicalEntry {
1230 lemma: None,
1231 features: vec![
1232 Feature::Affix("d", Direction::Right),
1233 Feature::Category("V")
1234 ]
1235 }
1236 );
1237 }
1238
1239 #[test]
1240 fn get_lexical_entry() -> anyhow::Result<()> {
1241 let entries: HashSet<_> = STABLER2011
1242 .split('\n')
1243 .map(SimpleLexicalEntry::parse)
1244 .collect::<Result<_, LexiconParsingError>>()?;
1245 let lex = Lexicon::from_string(STABLER2011)?;
1246 for nx in &lex.leaves {
1247 let lexical_entry = lex.get_lexical_entry(*nx)?;
1248 assert!(entries.contains(&lexical_entry));
1249 }
1250 Ok(())
1251 }
1252 #[test]
1253 fn get_category() {
1254 assert_eq!(
1255 *SimpleLexicalEntry::parse("eats::d= =d V")
1256 .unwrap()
1257 .category(),
1258 "V"
1259 );
1260 assert_eq!(
1261 *SimpleLexicalEntry::parse("eats::d= V -d")
1262 .unwrap()
1263 .category(),
1264 "V"
1265 );
1266 assert_eq!(
1267 *SimpleLexicalEntry::parse("eats::C -d").unwrap().category(),
1268 "C"
1269 );
1270 assert_eq!(
1271 *SimpleLexicalEntry::parse("eats::+w Z -d")
1272 .unwrap()
1273 .category(),
1274 "Z"
1275 );
1276 }
1277
1278 #[test]
1279 fn convert_to_vec() {
1280 let x: Vec<FeatureOrLemma<_, _>> =
1281 SimpleLexicalEntry::parse("eats::d= =d V").unwrap().into();
1282 assert_eq!(
1283 x,
1284 vec![
1285 FeatureOrLemma::Lemma(Some("eats")),
1286 FeatureOrLemma::Complement("d", Direction::Right),
1287 FeatureOrLemma::Feature(Feature::Selector("d", Direction::Left)),
1288 FeatureOrLemma::Feature(Feature::Category("V")),
1289 ]
1290 );
1291 }
1292
1293 use crate::grammars::{COPY_LANGUAGE, STABLER2011};
1294 use petgraph::dot::Dot;
1295
1296 #[test]
1297 fn category() -> anyhow::Result<()> {
1298 let lex = Lexicon::from_string(STABLER2011)?;
1299 let leaves = lex
1300 .leaves()
1301 .iter()
1302 .map(|&x| (*lex.leaf_to_lemma(x).unwrap(), *lex.category(x).unwrap()))
1303 .collect::<Vec<_>>();
1304 assert_eq!(
1305 leaves,
1306 vec![
1307 (None, "C"),
1308 (None, "C"),
1309 (Some("knows"), "V"),
1310 (Some("says"), "V"),
1311 (Some("prefers"), "V"),
1312 (Some("drinks"), "V"),
1313 (Some("king"), "N"),
1314 (Some("wine"), "N"),
1315 (Some("beer"), "N"),
1316 (Some("queen"), "N"),
1317 (Some("the"), "D"),
1318 (Some("which"), "D")
1319 ]
1320 );
1321 Ok(())
1322 }
1323
1324 #[test]
1325 fn siblings() -> anyhow::Result<()> {
1326 let lex = Lexicon::from_string(STABLER2011)?;
1327 let siblings = lex.sibling_leaves();
1328 let siblings = siblings
1329 .into_iter()
1330 .map(|x| {
1331 x.into_iter()
1332 .map(|x| *lex.leaf_to_lemma(x).unwrap())
1333 .collect::<Vec<_>>()
1334 })
1335 .collect::<Vec<_>>();
1336 assert_eq!(
1337 siblings,
1338 vec![
1339 vec![Some("which")],
1340 vec![Some("the")],
1341 vec![Some("queen"), Some("beer"), Some("wine"), Some("king")],
1342 vec![Some("drinks"), Some("prefers")],
1343 vec![Some("says"), Some("knows")],
1344 vec![None],
1345 vec![None]
1346 ]
1347 );
1348 Ok(())
1349 }
1350
1351 #[test]
1352 fn initialize_lexicon() -> anyhow::Result<()> {
1353 let strings: Vec<&str> = STABLER2011.split('\n').collect();
1354
1355 let lex = Lexicon::from_string(STABLER2011)?;
1356 for lex in lex.lexemes()? {
1357 assert!(
1358 strings.contains(&format!("{}", lex).as_str())
1359 || strings.contains(&format!("{}", lex).replace('ε', "").as_str())
1360 )
1361 }
1362 let lex_2 = Lexicon::new(lex.lexemes().unwrap(), false);
1363 assert_eq!(lex, lex_2);
1364 assert_eq!(
1365 format!("{}", Dot::new(&lex.graph)),
1366 "digraph {
1367 0 [ label = \"root\" ]
1368 1 [ label = \"C\" ]
1369 2 [ label = \"V=\" ]
1370 3 [ label = \"ε\" ]
1371 4 [ label = \"+W\" ]
1372 5 [ label = \"V=\" ]
1373 6 [ label = \"ε\" ]
1374 7 [ label = \"V\" ]
1375 8 [ label = \"=D\" ]
1376 9 [ label = \"C=\" ]
1377 10 [ label = \"knows\" ]
1378 11 [ label = \"says\" ]
1379 12 [ label = \"D=\" ]
1380 13 [ label = \"prefers\" ]
1381 14 [ label = \"drinks\" ]
1382 15 [ label = \"N\" ]
1383 16 [ label = \"king\" ]
1384 17 [ label = \"wine\" ]
1385 18 [ label = \"beer\" ]
1386 19 [ label = \"queen\" ]
1387 20 [ label = \"D\" ]
1388 21 [ label = \"N=\" ]
1389 22 [ label = \"the\" ]
1390 23 [ label = \"-W\" ]
1391 24 [ label = \"D\" ]
1392 25 [ label = \"N=\" ]
1393 26 [ label = \"which\" ]
1394 0 -> 1 [ label = \"-2.8042006992676702\" ]
1395 1 -> 2 [ label = \"-0.6931471805599453\" ]
1396 2 -> 3 [ label = \"0\" ]
1397 1 -> 4 [ label = \"-0.6931471805599453\" ]
1398 4 -> 5 [ label = \"0\" ]
1399 5 -> 6 [ label = \"0\" ]
1400 0 -> 7 [ label = \"-0.8042006992676702\" ]
1401 7 -> 8 [ label = \"0\" ]
1402 8 -> 9 [ label = \"-0.6931471805599454\" ]
1403 9 -> 10 [ label = \"-0.6931471805599453\" ]
1404 9 -> 11 [ label = \"-0.6931471805599453\" ]
1405 8 -> 12 [ label = \"-0.6931471805599454\" ]
1406 12 -> 13 [ label = \"-0.6931471805599453\" ]
1407 12 -> 14 [ label = \"-0.6931471805599453\" ]
1408 0 -> 15 [ label = \"-0.8042006992676702\" ]
1409 15 -> 16 [ label = \"-1.3862943611198906\" ]
1410 15 -> 17 [ label = \"-1.3862943611198906\" ]
1411 15 -> 18 [ label = \"-1.3862943611198906\" ]
1412 15 -> 19 [ label = \"-1.3862943611198906\" ]
1413 0 -> 20 [ label = \"-3.8042006992676702\" ]
1414 20 -> 21 [ label = \"0\" ]
1415 21 -> 22 [ label = \"0\" ]
1416 0 -> 23 [ label = \"-3.8042006992676702\" ]
1417 23 -> 24 [ label = \"0\" ]
1418 24 -> 25 [ label = \"0\" ]
1419 25 -> 26 [ label = \"0\" ]
1420}
1421"
1422 );
1423 let n_categories = lex.categories().collect::<HashSet<_>>().len();
1424 assert_eq!(4, n_categories);
1425 let n_licensors = lex.licensor_types().collect::<HashSet<_>>().len();
1426 assert_eq!(1, n_licensors);
1427 let mut lemmas: Vec<_> = lex.lemmas().collect();
1428 lemmas.sort();
1429 assert_eq!(
1430 lemmas,
1431 vec![
1432 &None,
1433 &None,
1434 &Some("beer"),
1435 &Some("drinks"),
1436 &Some("king"),
1437 &Some("knows"),
1438 &Some("prefers"),
1439 &Some("queen"),
1440 &Some("says"),
1441 &Some("the"),
1442 &Some("which"),
1443 &Some("wine")
1444 ]
1445 );
1446
1447 let lex = Lexicon::from_string(COPY_LANGUAGE)?;
1448 assert_ne!(lex, lex_2);
1449 assert_eq!(
1450 "digraph {
1451 0 [ label = \"root\" ]
1452 1 [ label = \"T\" ]
1453 2 [ label = \"+l\" ]
1454 3 [ label = \"+r\" ]
1455 4 [ label = \"=T\" ]
1456 5 [ label = \"ε\" ]
1457 6 [ label = \"-l\" ]
1458 7 [ label = \"-r\" ]
1459 8 [ label = \"T\" ]
1460 9 [ label = \"ε\" ]
1461 10 [ label = \"T\" ]
1462 11 [ label = \"+l\" ]
1463 12 [ label = \"=A\" ]
1464 13 [ label = \"a\" ]
1465 14 [ label = \"-r\" ]
1466 15 [ label = \"A\" ]
1467 16 [ label = \"+r\" ]
1468 17 [ label = \"=T\" ]
1469 18 [ label = \"a\" ]
1470 19 [ label = \"=B\" ]
1471 20 [ label = \"b\" ]
1472 21 [ label = \"B\" ]
1473 22 [ label = \"+r\" ]
1474 23 [ label = \"=T\" ]
1475 24 [ label = \"b\" ]
1476 0 -> 1 [ label = \"-2.4076059644443806\" ]
1477 1 -> 2 [ label = \"0\" ]
1478 2 -> 3 [ label = \"0\" ]
1479 3 -> 4 [ label = \"0\" ]
1480 4 -> 5 [ label = \"0\" ]
1481 0 -> 6 [ label = \"-0.4076059644443806\" ]
1482 6 -> 7 [ label = \"-1.3132616875182228\" ]
1483 7 -> 8 [ label = \"0\" ]
1484 8 -> 9 [ label = \"0\" ]
1485 6 -> 10 [ label = \"-0.3132616875182228\" ]
1486 10 -> 11 [ label = \"0\" ]
1487 11 -> 12 [ label = \"-0.6931471805599453\" ]
1488 12 -> 13 [ label = \"0\" ]
1489 0 -> 14 [ label = \"-1.4076059644443804\" ]
1490 14 -> 15 [ label = \"-0.6931471805599453\" ]
1491 15 -> 16 [ label = \"0\" ]
1492 16 -> 17 [ label = \"0\" ]
1493 17 -> 18 [ label = \"0\" ]
1494 11 -> 19 [ label = \"-0.6931471805599453\" ]
1495 19 -> 20 [ label = \"0\" ]
1496 14 -> 21 [ label = \"-0.6931471805599453\" ]
1497 21 -> 22 [ label = \"0\" ]
1498 22 -> 23 [ label = \"0\" ]
1499 23 -> 24 [ label = \"0\" ]
1500}
1501",
1502 format!("{}", Dot::new(&lex.graph))
1503 );
1504 let n_categories = lex.categories().collect::<HashSet<_>>().len();
1505 assert_eq!(3, n_categories);
1506 let n_licensors = lex.licensor_types().collect::<HashSet<_>>().len();
1507 assert_eq!(2, n_licensors);
1508 Ok(())
1509 }
1510
1511 #[test]
1512 fn conversion() -> anyhow::Result<()> {
1513 let lex = Lexicon::from_string(STABLER2011)?;
1514 lex.to_owned_values();
1515 Ok(())
1516 }
1517 #[test]
1518 fn conversion2() -> anyhow::Result<()> {
1519 let lex = Lexicon::from_string(STABLER2011)?;
1520 let lex2 = lex.remap_lexicon(|x| x.to_string(), |c| c.to_string());
1521 let lex3 = lex2.remap_lexicon(|x| x.as_str(), |c| c.as_str());
1522 assert_eq!(lex, lex3);
1523
1524 let x = ParsingError::NoLicensorOrCategory(LicenseeOrCategory::Category("s"));
1525 let _y: ParsingError<String> = x.inner_into();
1526
1527 Ok(())
1528 }
1529}