minimalist_grammar_parser/parsing/
rules.rs

1//! Defines helper functions which allow one to record the structure of a parse.
2use ahash::HashSet;
3use petgraph::graph::NodeIndex;
4use std::{fmt::Debug, hash::Hash};
5
6#[cfg(feature = "pretty")]
7mod printing;
8#[cfg(feature = "pretty")]
9mod serialization;
10
11#[cfg(feature = "pretty")]
12pub use printing::Derivation;
13#[cfg(feature = "pretty")]
14pub use serialization::{Tree, TreeEdge, TreeNode, TreeWithMovement};
15
16use crate::{Direction, lexicon::LexemeId};
17
18use super::trees::GornIndex;
19
20#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
21pub(crate) struct RuleIndex(usize);
22impl RuleIndex {
23    pub fn one() -> Self {
24        RuleIndex(1)
25    }
26}
27
28///This struct record the ID of each trace in a derivation.
29#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
30pub struct TraceId(usize);
31
32impl TraceId {
33    ///Gets the inner value of the trace as a `usize`
34    pub fn index(&self) -> usize {
35        self.0
36    }
37}
38
39impl From<TraceId> for usize {
40    fn from(value: TraceId) -> Self {
41        value.0
42    }
43}
44
45impl std::fmt::Display for TraceId {
46    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
47        write!(f, "t{}", self.0)
48    }
49}
50
51#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
52pub(crate) enum StolenInfo {
53    Normal,
54    Stolen(RuleIndex, GornIndex),
55    Stealer,
56}
57
58#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
59pub(crate) enum Rule {
60    Start {
61        node: NodeIndex,
62        child: RuleIndex,
63    },
64    UnmoveTrace(TraceId),
65    Scan {
66        lexeme: LexemeId,
67        stolen: StolenInfo,
68    },
69    Unmerge {
70        child: NodeIndex,
71        child_id: RuleIndex,
72        complement_id: RuleIndex,
73        dir: Direction,
74        affix: bool,
75    },
76    UnmergeFromMover {
77        child: NodeIndex,
78        child_id: RuleIndex,
79        stored_id: RuleIndex,
80        destination_id: RuleIndex,
81        dir: Direction,
82        trace_id: TraceId,
83    },
84    Unmove {
85        child_id: RuleIndex,
86        stored_id: RuleIndex,
87    },
88    UnmoveFromMover {
89        child_id: RuleIndex,
90        stored_id: RuleIndex,
91        destination_id: RuleIndex,
92        trace_id: TraceId,
93    },
94}
95
96impl Rule {
97    #[cfg(any(feature = "semantics", feature = "pretty"))]
98    fn children_directed(&self) -> impl DoubleEndedIterator<Item = RuleIndex> {
99        match self {
100            Rule::Start { child, .. } => [Some(*child), None],
101            Rule::UnmoveTrace(_) | Rule::Scan { .. } => [None, None],
102            Rule::Unmove {
103                child_id: a,
104                stored_id: b,
105            }
106            | Rule::UnmoveFromMover {
107                child_id: a,
108                stored_id: b,
109                ..
110            } => [Some(*b), Some(*a)],
111            Rule::Unmerge {
112                child_id: a,
113                complement_id: b,
114                dir,
115                ..
116            }
117            | Rule::UnmergeFromMover {
118                child_id: a,
119                stored_id: b,
120                dir,
121                ..
122            } => match dir {
123                Direction::Left => [Some(*b), Some(*a)],
124                Direction::Right => [Some(*a), Some(*b)],
125            },
126        }
127        .into_iter()
128        .flatten()
129    }
130
131    #[cfg(feature = "semantics")]
132    fn children(&self) -> impl DoubleEndedIterator<Item = RuleIndex> {
133        match self {
134            Rule::Start { child, .. } => [Some(*child), None],
135            Rule::UnmoveTrace(_) | Rule::Scan { .. } => [None, None],
136            Rule::Unmove {
137                child_id: a,
138                stored_id: b,
139            }
140            | Rule::UnmoveFromMover {
141                child_id: a,
142                stored_id: b,
143                ..
144            }
145            | Rule::Unmerge {
146                child_id: a,
147                complement_id: b,
148                ..
149            }
150            | Rule::UnmergeFromMover {
151                child_id: a,
152                stored_id: b,
153                ..
154            } => [Some(*a), Some(*b)],
155        }
156        .into_iter()
157        .flatten()
158    }
159
160    fn is_scan(&self) -> bool {
161        matches!(self, Rule::Scan { .. })
162    }
163}
164
165#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
166struct PartialIndex(usize);
167
168#[derive(Debug, Clone, Copy, PartialEq, Eq)]
169pub(crate) struct RuleHolder {
170    rule: Rule,
171    index: RuleIndex,
172    parent: Option<PartialIndex>,
173}
174
175#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
176pub(crate) struct PartialRulePool {
177    n_traces: usize,
178    n_nodes: usize,
179    most_recent: PartialIndex,
180}
181
182impl PartialRulePool {
183    pub(crate) fn fresh(&mut self) -> RuleIndex {
184        let id = RuleIndex(self.n_nodes); //Get fresh ID
185        self.n_nodes += 1;
186        id
187    }
188
189    pub(crate) fn fresh_trace(&mut self) -> TraceId {
190        let i = TraceId(self.n_traces);
191        self.n_traces += 1;
192        i
193    }
194
195    pub(crate) fn n_steps(&self) -> usize {
196        self.n_nodes
197    }
198
199    pub(crate) fn push_rule(&mut self, pool: &mut Vec<RuleHolder>, rule: Rule, index: RuleIndex) {
200        pool.push(RuleHolder {
201            rule,
202            index,
203            parent: Some(self.most_recent),
204        });
205        self.most_recent = PartialIndex(pool.len() - 1);
206    }
207
208    pub(crate) fn default_pool(cat: NodeIndex) -> Vec<RuleHolder> {
209        let mut v = Vec::with_capacity(100_000);
210        v.push(RuleHolder {
211            rule: Rule::Start {
212                node: cat,
213                child: RuleIndex(1),
214            },
215            index: RuleIndex(0),
216            parent: None,
217        });
218        v
219    }
220
221    pub(crate) fn into_rule_pool(self, big_pool: &[RuleHolder]) -> RulePool {
222        let mut pool = vec![None; self.n_nodes];
223        let mut i = Some(self.most_recent);
224
225        while i.is_some() {
226            let RuleHolder {
227                rule,
228                index,
229                parent,
230            } = big_pool[i.unwrap().0];
231            match rule {
232                Rule::UnmoveFromMover {
233                    destination_id,
234                    trace_id,
235                    ..
236                }
237                | Rule::UnmergeFromMover {
238                    destination_id,
239                    trace_id,
240                    ..
241                } => {
242                    pool[destination_id.0] = Some(Rule::UnmoveTrace(trace_id));
243                }
244                _ => (),
245            }
246            pool[index.0] = Some(rule);
247            i = parent;
248        }
249
250        RulePool(pool.into_iter().collect::<Option<Vec<_>>>().unwrap())
251    }
252}
253
254impl Default for PartialRulePool {
255    fn default() -> Self {
256        PartialRulePool {
257            n_traces: 0,
258            n_nodes: 2,
259            most_recent: PartialIndex(0),
260        }
261    }
262}
263
264///This struct holds the history of which rules were used to generate a parse and thus can be used
265///to plot trees or look at the syntactic structure of a parse.
266#[derive(Debug, Clone, Eq, PartialEq)]
267pub struct RulePool(Vec<Rule>);
268
269#[cfg(feature = "semantics")]
270pub mod semantics;
271
272impl RulePool {
273    #[cfg(feature = "pretty")]
274    pub(crate) fn complement_last(
275        &self,
276        x: RuleIndex,
277    ) -> impl DoubleEndedIterator<Item = RuleIndex> {
278        let x = self.get(x);
279        match x {
280            Rule::Start { child, .. } => [Some(*child), None],
281            Rule::UnmoveTrace(_) | Rule::Scan { .. } => [None, None],
282            Rule::Unmove {
283                child_id: a,
284                stored_id: b,
285            }
286            | Rule::Unmerge {
287                child_id: a,
288                complement_id: b,
289                ..
290            }
291            | Rule::UnmergeFromMover {
292                child_id: a,
293                stored_id: b,
294                ..
295            }
296            | Rule::UnmoveFromMover {
297                child_id: a,
298                stored_id: b,
299                ..
300            } => match (self.get(*a).is_scan(), self.get(*b).is_scan()) {
301                (true, false) => [Some(*a), Some(*b)],
302                (false, true) => [Some(*b), Some(*a)],
303                (false, false) => [Some(*b), Some(*a)],
304                (true, true) => [Some(*b), Some(*a)],
305            },
306        }
307        .into_iter()
308        .flatten()
309    }
310
311    ///The number of steps in the derivation
312    pub fn n_steps(&self) -> usize {
313        self.0.len()
314    }
315
316    #[cfg(any(feature = "pretty", feature = "semantics"))]
317    fn get(&self, x: RuleIndex) -> &Rule {
318        &self.0[x.0]
319    }
320
321    fn iter(&self) -> impl Iterator<Item = &Rule> {
322        self.0.iter()
323    }
324
325    ///Gets an iterator of all used leaves.
326    pub fn used_lemmas(&self) -> impl Iterator<Item = LexemeId> {
327        self.0.iter().filter_map(|x| {
328            if let Rule::Scan { lexeme, .. } = x {
329                Some(*lexeme)
330            } else {
331                None
332            }
333        })
334    }
335
336    ///Records the maximum number of moving pieces stored in memory at a single time.
337    pub fn max_memory_load(&self) -> usize {
338        let mut max = 0;
339        let mut memory = HashSet::default();
340        for rule in &self.0 {
341            match rule {
342                Rule::UnmoveTrace(trace_id) => {
343                    memory.insert(*trace_id);
344                    if memory.len() > max {
345                        max = memory.len();
346                    }
347                }
348                Rule::UnmergeFromMover { trace_id, .. }
349                | Rule::UnmoveFromMover { trace_id, .. } => {
350                    memory.remove(trace_id);
351                }
352                _ => (),
353            }
354        }
355        max
356    }
357}
358
359#[cfg(test)]
360mod test {
361    use crate::{Lexicon, PhonContent};
362
363    #[test]
364    fn memory_load() -> anyhow::Result<()> {
365        let grammar = Lexicon::from_string("a::b= c= +a +e C\nb::b -a\nc::c -e")?;
366
367        let (_, _, rules) = grammar
368            .parse(
369                &[
370                    PhonContent::Normal("c"),
371                    PhonContent::Normal("b"),
372                    PhonContent::Normal("a"),
373                ],
374                "C",
375                &crate::ParsingConfig::default(),
376            )?
377            .next()
378            .unwrap();
379
380        assert_eq!(rules.max_memory_load(), 2);
381        let grammar = Lexicon::from_string("a::b= +a c= +e C\nb::b -a\nc::c -e")?;
382
383        let (_, _, rules) = grammar
384            .parse(
385                &[
386                    PhonContent::Normal("c"),
387                    PhonContent::Normal("b"),
388                    PhonContent::Normal("a"),
389                ],
390                "C",
391                &crate::ParsingConfig::default(),
392            )?
393            .next()
394            .unwrap();
395
396        assert_eq!(rules.max_memory_load(), 1);
397        Ok(())
398    }
399}