minimalist_grammar_parser/parsing/
rules.rs

1//! Defines helper functions which allow one to record the structure of a parse.
2use ahash::HashSet;
3use petgraph::graph::NodeIndex;
4use std::{collections::BTreeMap, fmt::Debug, hash::Hash};
5
6#[cfg(feature = "pretty")]
7mod printing;
8#[cfg(feature = "pretty")]
9mod serialization;
10
11#[cfg(feature = "pretty")]
12pub use printing::Derivation;
13#[cfg(feature = "pretty")]
14pub use serialization::{Tree, TreeEdge, TreeNode, TreeWithMovement};
15
16use crate::{
17    Direction, Lexicon,
18    lexicon::{Feature, FeatureOrLemma, LexemeId},
19};
20
21use super::trees::GornIndex;
22
23#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
24pub(crate) struct RuleIndex(usize);
25impl RuleIndex {
26    pub fn one() -> Self {
27        RuleIndex(1)
28    }
29}
30
31///This struct record the ID of each trace in a derivation.
32#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
33pub struct TraceId(usize);
34
35impl TraceId {
36    ///Gets the inner value of the trace as a `usize`
37    #[must_use]
38    pub fn index(&self) -> usize {
39        self.0
40    }
41}
42
43impl From<TraceId> for usize {
44    fn from(value: TraceId) -> Self {
45        value.0
46    }
47}
48
49impl std::fmt::Display for TraceId {
50    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51        write!(f, "t{}", self.0)
52    }
53}
54
55#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
56pub(crate) enum StolenInfo {
57    Normal,
58    Stolen(RuleIndex, GornIndex),
59    Stealer,
60}
61
62#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
63pub(crate) enum Rule {
64    Start {
65        node: NodeIndex,
66        child: RuleIndex,
67    },
68    UnmoveTrace(TraceId),
69    Scan {
70        lexeme: LexemeId,
71        stolen: StolenInfo,
72    },
73    Unmerge {
74        child: NodeIndex,
75        child_id: RuleIndex,
76        complement_id: RuleIndex,
77        dir: Direction,
78        affix: bool,
79    },
80    UnmergeFromMover {
81        child: NodeIndex,
82        child_id: RuleIndex,
83        stored_id: RuleIndex,
84        destination_id: RuleIndex,
85        dir: Direction,
86        trace_id: TraceId,
87    },
88    Unmove {
89        child_id: RuleIndex,
90        stored_id: RuleIndex,
91    },
92    UnmoveFromMover {
93        child_id: RuleIndex,
94        stored_id: RuleIndex,
95        destination_id: RuleIndex,
96        trace_id: TraceId,
97    },
98}
99
100///A description of each step in the derivation. The usizes are indices pointing to the other rules
101///in the tree.
102#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Clone, Copy, Hash)]
103pub enum DerivationStep<T> {
104    ///A lemma is merged here
105    Lemma(Option<T>),
106
107    ///Merge
108    Merge {
109        ///The child (towards the head)
110        child: usize,
111        ///The argument of the merge
112        argument: usize,
113        ///Whether it merged left or right
114        direction: Direction,
115    },
116    ///Merge right and move the head of the complement
117    MergeAffix {
118        ///The child (towards head)
119        child: usize,
120        ///The argument of the merge
121        argument: usize,
122        ///Whether the head of the affix was put to the right or left of the child
123        direction: Direction,
124    },
125    ///Move
126    Move {
127        ///The child of movement
128        child: usize,
129        ///That which has been moved
130        mover: usize,
131    },
132}
133
134impl RulePool {
135    ///This outputs a [`Vec`] of [`DerivationStep`]s which indicates the steps needed to make the
136    ///derivation. The usizes on the enum are indexes that indicates where the tree next points
137    ///
138    ///Warning, when something is moved twice, both move steps point to the origin!
139    pub fn as_derivation<T: Eq + Clone + Debug, C: Eq + Debug>(
140        &self,
141        lex: &Lexicon<T, C>,
142    ) -> Vec<DerivationStep<T>> {
143        let mut stack = vec![(RuleIndex(1), 0)];
144        let mut rules = vec![None];
145        let mut movers: BTreeMap<RuleIndex, Vec<usize>> = BTreeMap::new();
146
147        while let Some((x, i)) = stack.pop() {
148            match self.get(x) {
149                Rule::Start { .. } => unimplemented!("Should be skipped!"),
150                Rule::UnmoveTrace(_) => (),
151                Rule::Scan { lexeme, .. } => {
152                    let lemma = lex.leaf_to_lemma(*lexeme).unwrap();
153                    *rules.get_mut(i).unwrap() = Some(DerivationStep::Lemma(lemma.clone()));
154                }
155                Rule::Unmerge {
156                    child_id,
157                    complement_id,
158                    dir,
159                    affix,
160                    child: nx,
161                } => {
162                    let child = rules.len();
163                    let argument = rules.len() + 1;
164                    rules.extend([None, None]);
165                    stack.extend([(*child_id, child), (*complement_id, argument)]);
166                    *rules.get_mut(i).unwrap() = Some(if *affix {
167                        let FeatureOrLemma::Feature(Feature::Affix(_, dir)) =
168                            lex.get(*nx).unwrap().0
169                        else {
170                            panic!("Can't affix a non-affix")
171                        };
172                        DerivationStep::MergeAffix {
173                            child,
174                            argument,
175                            direction: *dir,
176                        }
177                    } else {
178                        DerivationStep::Merge {
179                            child,
180                            argument,
181                            direction: *dir,
182                        }
183                    });
184                }
185                Rule::UnmergeFromMover {
186                    child_id,
187                    stored_id,
188                    dir,
189                    destination_id,
190                    ..
191                } => {
192                    let child = rules.len();
193                    let argument = rules.len() + 1;
194                    rules.extend([None, None]);
195                    stack.extend([(*child_id, child), (*stored_id, argument)]);
196                    let targets = movers.get(destination_id).unwrap();
197                    for target in targets {
198                        let Some(DerivationStep::Move { mover, .. }) =
199                            rules.get_mut(*target).unwrap()
200                        else {
201                            panic!("Problem as all targets must be Move!")
202                        };
203                        *mover = argument;
204                    }
205                    *rules.get_mut(i).unwrap() = Some(DerivationStep::Merge {
206                        child,
207                        argument,
208                        direction: *dir,
209                    });
210                }
211                Rule::Unmove {
212                    child_id,
213                    stored_id,
214                } => {
215                    let child = rules.len();
216                    movers.insert(*stored_id, vec![i]);
217                    rules.push(None);
218                    stack.push((*child_id, child));
219                    *rules.get_mut(i).unwrap() = Some(DerivationStep::Move { child, mover: 0 });
220                }
221                Rule::UnmoveFromMover {
222                    child_id,
223                    stored_id,
224                    destination_id,
225                    ..
226                } => {
227                    let child = rules.len();
228                    let mut future_moves = movers.remove(destination_id).unwrap();
229                    future_moves.push(i);
230                    movers.insert(*stored_id, future_moves);
231                    rules.push(None);
232                    stack.push((*child_id, child));
233                    *rules.get_mut(i).unwrap() = Some(DerivationStep::Move { child, mover: 0 });
234                }
235            }
236        }
237        rules.into_iter().map(|x| x.unwrap()).collect()
238    }
239}
240impl Rule {
241    #[cfg(any(feature = "semantics", feature = "pretty"))]
242    fn children_directed(&self) -> impl DoubleEndedIterator<Item = RuleIndex> {
243        match self {
244            Rule::Start { child, .. } => [Some(*child), None],
245            Rule::UnmoveTrace(_) | Rule::Scan { .. } => [None, None],
246            Rule::Unmove {
247                child_id: a,
248                stored_id: b,
249            }
250            | Rule::UnmoveFromMover {
251                child_id: a,
252                stored_id: b,
253                ..
254            } => [Some(*b), Some(*a)],
255            Rule::Unmerge {
256                child_id: a,
257                complement_id: b,
258                dir,
259                ..
260            }
261            | Rule::UnmergeFromMover {
262                child_id: a,
263                stored_id: b,
264                dir,
265                ..
266            } => match dir {
267                Direction::Left => [Some(*b), Some(*a)],
268                Direction::Right => [Some(*a), Some(*b)],
269            },
270        }
271        .into_iter()
272        .flatten()
273    }
274
275    #[cfg(feature = "semantics")]
276    fn children(&self) -> impl DoubleEndedIterator<Item = RuleIndex> {
277        match self {
278            Rule::Start { child, .. } => [Some(*child), None],
279            Rule::UnmoveTrace(_) | Rule::Scan { .. } => [None, None],
280            Rule::Unmove {
281                child_id: a,
282                stored_id: b,
283            }
284            | Rule::UnmoveFromMover {
285                child_id: a,
286                stored_id: b,
287                ..
288            }
289            | Rule::Unmerge {
290                child_id: a,
291                complement_id: b,
292                ..
293            }
294            | Rule::UnmergeFromMover {
295                child_id: a,
296                stored_id: b,
297                ..
298            } => [Some(*a), Some(*b)],
299        }
300        .into_iter()
301        .flatten()
302    }
303
304    fn is_scan(&self) -> bool {
305        matches!(self, Rule::Scan { .. })
306    }
307}
308
309#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
310struct PartialIndex(usize);
311
312#[derive(Debug, Clone, Copy, PartialEq, Eq)]
313pub(crate) struct RuleHolder {
314    rule: Rule,
315    index: RuleIndex,
316    parent: Option<PartialIndex>,
317}
318
319#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
320pub(crate) struct PartialRulePool {
321    n_traces: usize,
322    n_nodes: usize,
323    most_recent: PartialIndex,
324}
325
326impl PartialRulePool {
327    ///Check what the next call to [`PartialRulePool::fresh`] will return without modifying
328    pub(crate) fn peek_fresh(&self) -> RuleIndex {
329        RuleIndex(self.n_nodes)
330    }
331
332    pub(crate) fn fresh(&mut self) -> RuleIndex {
333        let id = RuleIndex(self.n_nodes); //Get fresh ID
334        self.n_nodes += 1;
335        id
336    }
337
338    pub(crate) fn fresh_trace(&mut self) -> TraceId {
339        let i = TraceId(self.n_traces);
340        self.n_traces += 1;
341        i
342    }
343
344    pub(crate) fn n_steps(&self) -> usize {
345        self.n_nodes
346    }
347
348    pub(crate) fn push_rule(&mut self, pool: &mut Vec<RuleHolder>, rule: Rule, index: RuleIndex) {
349        pool.push(RuleHolder {
350            rule,
351            index,
352            parent: Some(self.most_recent),
353        });
354        self.most_recent = PartialIndex(pool.len() - 1);
355    }
356
357    pub(crate) fn default_pool(cat: NodeIndex) -> Vec<RuleHolder> {
358        let mut v = Vec::with_capacity(100_000);
359        v.push(RuleHolder {
360            rule: Rule::Start {
361                node: cat,
362                child: RuleIndex(1),
363            },
364            index: RuleIndex(0),
365            parent: None,
366        });
367        v
368    }
369
370    pub(crate) fn into_rule_pool(self, big_pool: &[RuleHolder]) -> RulePool {
371        let mut pool = vec![None; self.n_nodes];
372        let mut i = Some(self.most_recent);
373
374        while i.is_some() {
375            let RuleHolder {
376                rule,
377                index,
378                parent,
379            } = big_pool[i.unwrap().0];
380            match rule {
381                Rule::UnmoveFromMover {
382                    destination_id,
383                    trace_id,
384                    ..
385                }
386                | Rule::UnmergeFromMover {
387                    destination_id,
388                    trace_id,
389                    ..
390                } => {
391                    pool[destination_id.0] = Some(Rule::UnmoveTrace(trace_id));
392                }
393                _ => (),
394            }
395            pool[index.0] = Some(rule);
396            i = parent;
397        }
398
399        RulePool(pool.into_iter().collect::<Option<Vec<_>>>().unwrap())
400    }
401}
402
403impl Default for PartialRulePool {
404    fn default() -> Self {
405        PartialRulePool {
406            n_traces: 0,
407            n_nodes: 2,
408            most_recent: PartialIndex(0),
409        }
410    }
411}
412
413///This struct holds the history of which rules were used to generate a parse and thus can be used
414///to plot trees or look at the syntactic structure of a parse.
415#[derive(Debug, Clone, Eq, PartialEq, Hash)]
416pub struct RulePool(Vec<Rule>);
417
418#[cfg(feature = "semantics")]
419pub mod semantics;
420
421impl RulePool {
422    #[cfg(feature = "pretty")]
423    pub(crate) fn complement_last(
424        &self,
425        x: RuleIndex,
426    ) -> impl DoubleEndedIterator<Item = RuleIndex> {
427        let x = self.get(x);
428        match x {
429            Rule::Start { child, .. } => [Some(*child), None],
430            Rule::UnmoveTrace(_) | Rule::Scan { .. } => [None, None],
431            Rule::Unmove {
432                child_id: a,
433                stored_id: b,
434            }
435            | Rule::Unmerge {
436                child_id: a,
437                complement_id: b,
438                ..
439            }
440            | Rule::UnmergeFromMover {
441                child_id: a,
442                stored_id: b,
443                ..
444            }
445            | Rule::UnmoveFromMover {
446                child_id: a,
447                stored_id: b,
448                ..
449            } => match (self.get(*a).is_scan(), self.get(*b).is_scan()) {
450                (true, false) => [Some(*a), Some(*b)],
451                (false, true) | (false, false) | (true, true) => [Some(*b), Some(*a)],
452            },
453        }
454        .into_iter()
455        .flatten()
456    }
457
458    ///The number of steps in the derivation
459    #[must_use]
460    pub fn n_steps(&self) -> usize {
461        self.0.len()
462    }
463
464    fn get(&self, x: RuleIndex) -> &Rule {
465        &self.0[x.0]
466    }
467
468    fn iter(&self) -> impl Iterator<Item = &Rule> {
469        self.0.iter()
470    }
471
472    ///Gets an iterator of all used leaves.
473    pub fn used_lemmas(&self) -> impl Iterator<Item = LexemeId> {
474        self.0.iter().filter_map(|x| {
475            if let Rule::Scan { lexeme, .. } = x {
476                Some(*lexeme)
477            } else {
478                None
479            }
480        })
481    }
482
483    ///Records the maximum number of moving pieces stored in memory at a single time.
484    #[must_use]
485    pub fn max_memory_load(&self) -> usize {
486        let mut max = 0;
487        let mut memory = HashSet::default();
488        for rule in &self.0 {
489            match rule {
490                Rule::UnmoveTrace(trace_id) => {
491                    memory.insert(*trace_id);
492                    if memory.len() > max {
493                        max = memory.len();
494                    }
495                }
496                Rule::UnmergeFromMover { trace_id, .. }
497                | Rule::UnmoveFromMover { trace_id, .. } => {
498                    memory.remove(trace_id);
499                }
500                _ => (),
501            }
502        }
503        max
504    }
505}
506
507#[cfg(test)]
508mod test {
509    use crate::{
510        Direction, Lexicon, ParsingConfig, PhonContent, grammars, parsing::rules::DerivationStep,
511    };
512
513    #[test]
514    fn memory_load() -> anyhow::Result<()> {
515        let grammar = Lexicon::from_string("a::b= c= +a +e C\nb::b -a\nc::c -e")?;
516
517        let (_, _, rules) = grammar
518            .parse(
519                &[
520                    PhonContent::Normal("c"),
521                    PhonContent::Normal("b"),
522                    PhonContent::Normal("a"),
523                ],
524                "C",
525                &crate::ParsingConfig::default(),
526            )?
527            .next()
528            .unwrap();
529
530        assert_eq!(rules.max_memory_load(), 2);
531        let grammar = Lexicon::from_string("a::b= +a c= +e C\nb::b -a\nc::c -e")?;
532
533        let (_, _, rules) = grammar
534            .parse(
535                &[
536                    PhonContent::Normal("c"),
537                    PhonContent::Normal("b"),
538                    PhonContent::Normal("a"),
539                ],
540                "C",
541                &crate::ParsingConfig::default(),
542            )?
543            .next()
544            .unwrap();
545
546        assert_eq!(rules.max_memory_load(), 1);
547        Ok(())
548    }
549
550    #[test]
551    fn convert_to_derivation() -> anyhow::Result<()> {
552        let lexicon = Lexicon::from_string("a::b= s\nb::c= b\nc::c")?;
553        for (_, _, rules) in lexicon.generate("s", &ParsingConfig::default())? {
554            assert_eq!(
555                rules.as_derivation(&lexicon),
556                vec![
557                    DerivationStep::Merge {
558                        child: 1,
559                        argument: 2,
560                        direction: Direction::Right
561                    },
562                    DerivationStep::Lemma(Some("a")),
563                    DerivationStep::Merge {
564                        child: 3,
565                        argument: 4,
566                        direction: Direction::Right
567                    },
568                    DerivationStep::Lemma(Some("b")),
569                    DerivationStep::Lemma(Some("c"))
570                ]
571            );
572        }
573
574        println!("The king knows which beer the queen drinks");
575        let lexicon = Lexicon::from_string(grammars::STABLER2011)?;
576        for (_, _, rules) in lexicon.parse(
577            &PhonContent::from([
578                "the", "king", "knows", "which", "beer", "the", "queen", "drinks",
579            ]),
580            "C",
581            &ParsingConfig::default(),
582        )? {
583            let d = rules.as_derivation(&lexicon);
584
585            for (i, x) in d.iter().enumerate() {
586                println!("{i}\t{x:?}");
587            }
588
589            assert_eq!(
590                d,
591                vec![
592                    DerivationStep::Merge {
593                        child: 1,
594                        argument: 2,
595                        direction: Direction::Right
596                    },
597                    DerivationStep::Lemma(None),
598                    DerivationStep::Merge {
599                        child: 3,
600                        argument: 4,
601                        direction: Direction::Left
602                    },
603                    DerivationStep::Merge {
604                        child: 7,
605                        argument: 8,
606                        direction: Direction::Right
607                    },
608                    DerivationStep::Merge {
609                        child: 5,
610                        argument: 6,
611                        direction: Direction::Right
612                    },
613                    DerivationStep::Lemma(Some("the")),
614                    DerivationStep::Lemma(Some("king")),
615                    DerivationStep::Lemma(Some("knows")),
616                    DerivationStep::Move {
617                        child: 9,
618                        mover: 17
619                    },
620                    DerivationStep::Merge {
621                        child: 10,
622                        argument: 11,
623                        direction: Direction::Right
624                    },
625                    DerivationStep::Lemma(None),
626                    DerivationStep::Merge {
627                        child: 12,
628                        argument: 13,
629                        direction: Direction::Left
630                    },
631                    DerivationStep::Merge {
632                        child: 16,
633                        argument: 17,
634                        direction: Direction::Right
635                    },
636                    DerivationStep::Merge {
637                        child: 14,
638                        argument: 15,
639                        direction: Direction::Right
640                    },
641                    DerivationStep::Lemma(Some("the")),
642                    DerivationStep::Lemma(Some("queen")),
643                    DerivationStep::Lemma(Some("drinks")),
644                    DerivationStep::Merge {
645                        child: 18,
646                        argument: 19,
647                        direction: Direction::Right
648                    },
649                    DerivationStep::Lemma(Some("which")),
650                    DerivationStep::Lemma(Some("beer"))
651                ]
652            );
653        }
654
655        let lexicon = Lexicon::from_string(grammars::COPY_LANGUAGE)?;
656        println!("ABAB");
657
658        for (_, _, rules) in lexicon.parse(
659            &PhonContent::from(["a", "b", "a", "b"]),
660            "T",
661            &ParsingConfig::default(),
662        )? {
663            let d = rules.as_derivation(&lexicon);
664
665            for (i, x) in d.iter().enumerate() {
666                println!("{i}\t{x:?}");
667            }
668            assert_eq!(
669                d,
670                vec![
671                    DerivationStep::Move { child: 1, mover: 4 },
672                    DerivationStep::Move { child: 2, mover: 7 },
673                    DerivationStep::Merge {
674                        child: 3,
675                        argument: 4,
676                        direction: Direction::Left
677                    },
678                    DerivationStep::Lemma(None),
679                    DerivationStep::Move {
680                        child: 5,
681                        mover: 10
682                    },
683                    DerivationStep::Merge {
684                        child: 6,
685                        argument: 7,
686                        direction: Direction::Left
687                    },
688                    DerivationStep::Lemma(Some("b")),
689                    DerivationStep::Move {
690                        child: 8,
691                        mover: 13
692                    },
693                    DerivationStep::Merge {
694                        child: 9,
695                        argument: 10,
696                        direction: Direction::Left
697                    },
698                    DerivationStep::Lemma(Some("b")),
699                    DerivationStep::Move {
700                        child: 11,
701                        mover: 16
702                    },
703                    DerivationStep::Merge {
704                        child: 12,
705                        argument: 13,
706                        direction: Direction::Left
707                    },
708                    DerivationStep::Lemma(Some("a")),
709                    DerivationStep::Move {
710                        child: 14,
711                        mover: 16
712                    },
713                    DerivationStep::Merge {
714                        child: 15,
715                        argument: 16,
716                        direction: Direction::Left
717                    },
718                    DerivationStep::Lemma(Some("a")),
719                    DerivationStep::Lemma(None)
720                ]
721            );
722        }
723
724        Ok(())
725    }
726}