1use ahash::HashSet;
3use petgraph::graph::NodeIndex;
4use std::{collections::BTreeMap, fmt::Debug, hash::Hash};
5
6#[cfg(feature = "pretty")]
7mod printing;
8#[cfg(feature = "pretty")]
9mod serialization;
10
11#[cfg(feature = "pretty")]
12pub use printing::Derivation;
13#[cfg(feature = "pretty")]
14pub use serialization::{Tree, TreeEdge, TreeNode, TreeWithMovement};
15
16use crate::{
17 Direction, Lexicon,
18 lexicon::{Feature, FeatureOrLemma, LexemeId},
19};
20
21use super::trees::GornIndex;
22
23#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
24pub(crate) struct RuleIndex(usize);
25impl RuleIndex {
26 pub fn one() -> Self {
27 RuleIndex(1)
28 }
29}
30
31#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
33pub struct TraceId(usize);
34
35impl TraceId {
36 #[must_use]
38 pub fn index(&self) -> usize {
39 self.0
40 }
41}
42
43impl From<TraceId> for usize {
44 fn from(value: TraceId) -> Self {
45 value.0
46 }
47}
48
49impl std::fmt::Display for TraceId {
50 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51 write!(f, "t{}", self.0)
52 }
53}
54
55#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
56pub(crate) enum StolenInfo {
57 Normal,
58 Stolen(RuleIndex, GornIndex),
59 Stealer,
60}
61
62#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
63pub(crate) enum Rule {
64 Start {
65 node: NodeIndex,
66 child: RuleIndex,
67 },
68 UnmoveTrace(TraceId),
69 Scan {
70 lexeme: LexemeId,
71 stolen: StolenInfo,
72 },
73 Unmerge {
74 child: NodeIndex,
75 child_id: RuleIndex,
76 complement_id: RuleIndex,
77 dir: Direction,
78 affix: bool,
79 },
80 UnmergeFromMover {
81 child: NodeIndex,
82 child_id: RuleIndex,
83 stored_id: RuleIndex,
84 destination_id: RuleIndex,
85 dir: Direction,
86 trace_id: TraceId,
87 },
88 Unmove {
89 child_id: RuleIndex,
90 stored_id: RuleIndex,
91 },
92 UnmoveFromMover {
93 child_id: RuleIndex,
94 stored_id: RuleIndex,
95 destination_id: RuleIndex,
96 trace_id: TraceId,
97 },
98}
99
100#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Clone, Copy, Hash)]
103pub enum DerivationStep<T> {
104 Lemma(Option<T>),
106
107 Merge {
109 child: usize,
111 argument: usize,
113 direction: Direction,
115 },
116 MergeAffix {
118 child: usize,
120 argument: usize,
122 direction: Direction,
124 },
125 Move {
127 child: usize,
129 mover: usize,
131 },
132}
133
134impl RulePool {
135 pub fn as_derivation<T: Eq + Clone + Debug, C: Eq + Debug>(
140 &self,
141 lex: &Lexicon<T, C>,
142 ) -> Vec<DerivationStep<T>> {
143 let mut stack = vec![(RuleIndex(1), 0)];
144 let mut rules = vec![None];
145 let mut movers: BTreeMap<RuleIndex, Vec<usize>> = BTreeMap::new();
146
147 while let Some((x, i)) = stack.pop() {
148 match self.get(x) {
149 Rule::Start { .. } => unimplemented!("Should be skipped!"),
150 Rule::UnmoveTrace(_) => (),
151 Rule::Scan { lexeme, .. } => {
152 let lemma = lex.leaf_to_lemma(*lexeme).unwrap();
153 *rules.get_mut(i).unwrap() = Some(DerivationStep::Lemma(lemma.clone()));
154 }
155 Rule::Unmerge {
156 child_id,
157 complement_id,
158 dir,
159 affix,
160 child: nx,
161 } => {
162 let child = rules.len();
163 let argument = rules.len() + 1;
164 rules.extend([None, None]);
165 stack.extend([(*child_id, child), (*complement_id, argument)]);
166 *rules.get_mut(i).unwrap() = Some(if *affix {
167 let FeatureOrLemma::Feature(Feature::Affix(_, dir)) =
168 lex.get(*nx).unwrap().0
169 else {
170 panic!("Can't affix a non-affix")
171 };
172 DerivationStep::MergeAffix {
173 child,
174 argument,
175 direction: *dir,
176 }
177 } else {
178 DerivationStep::Merge {
179 child,
180 argument,
181 direction: *dir,
182 }
183 });
184 }
185 Rule::UnmergeFromMover {
186 child_id,
187 stored_id,
188 dir,
189 destination_id,
190 ..
191 } => {
192 let child = rules.len();
193 let argument = rules.len() + 1;
194 rules.extend([None, None]);
195 stack.extend([(*child_id, child), (*stored_id, argument)]);
196 let targets = movers.get(destination_id).unwrap();
197 for target in targets {
198 let Some(DerivationStep::Move { mover, .. }) =
199 rules.get_mut(*target).unwrap()
200 else {
201 panic!("Problem as all targets must be Move!")
202 };
203 *mover = argument;
204 }
205 *rules.get_mut(i).unwrap() = Some(DerivationStep::Merge {
206 child,
207 argument,
208 direction: *dir,
209 });
210 }
211 Rule::Unmove {
212 child_id,
213 stored_id,
214 } => {
215 let child = rules.len();
216 movers.insert(*stored_id, vec![i]);
217 rules.push(None);
218 stack.push((*child_id, child));
219 *rules.get_mut(i).unwrap() = Some(DerivationStep::Move { child, mover: 0 });
220 }
221 Rule::UnmoveFromMover {
222 child_id,
223 stored_id,
224 destination_id,
225 ..
226 } => {
227 let child = rules.len();
228 let mut future_moves = movers.remove(destination_id).unwrap();
229 future_moves.push(i);
230 movers.insert(*stored_id, future_moves);
231 rules.push(None);
232 stack.push((*child_id, child));
233 *rules.get_mut(i).unwrap() = Some(DerivationStep::Move { child, mover: 0 });
234 }
235 }
236 }
237 rules.into_iter().map(|x| x.unwrap()).collect()
238 }
239}
240impl Rule {
241 #[cfg(any(feature = "semantics", feature = "pretty"))]
242 fn children_directed(&self) -> impl DoubleEndedIterator<Item = RuleIndex> {
243 match self {
244 Rule::Start { child, .. } => [Some(*child), None],
245 Rule::UnmoveTrace(_) | Rule::Scan { .. } => [None, None],
246 Rule::Unmove {
247 child_id: a,
248 stored_id: b,
249 }
250 | Rule::UnmoveFromMover {
251 child_id: a,
252 stored_id: b,
253 ..
254 } => [Some(*b), Some(*a)],
255 Rule::Unmerge {
256 child_id: a,
257 complement_id: b,
258 dir,
259 ..
260 }
261 | Rule::UnmergeFromMover {
262 child_id: a,
263 stored_id: b,
264 dir,
265 ..
266 } => match dir {
267 Direction::Left => [Some(*b), Some(*a)],
268 Direction::Right => [Some(*a), Some(*b)],
269 },
270 }
271 .into_iter()
272 .flatten()
273 }
274
275 #[cfg(feature = "semantics")]
276 fn children(&self) -> impl DoubleEndedIterator<Item = RuleIndex> {
277 match self {
278 Rule::Start { child, .. } => [Some(*child), None],
279 Rule::UnmoveTrace(_) | Rule::Scan { .. } => [None, None],
280 Rule::Unmove {
281 child_id: a,
282 stored_id: b,
283 }
284 | Rule::UnmoveFromMover {
285 child_id: a,
286 stored_id: b,
287 ..
288 }
289 | Rule::Unmerge {
290 child_id: a,
291 complement_id: b,
292 ..
293 }
294 | Rule::UnmergeFromMover {
295 child_id: a,
296 stored_id: b,
297 ..
298 } => [Some(*a), Some(*b)],
299 }
300 .into_iter()
301 .flatten()
302 }
303
304 fn is_scan(&self) -> bool {
305 matches!(self, Rule::Scan { .. })
306 }
307}
308
309#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
310struct PartialIndex(usize);
311
312#[derive(Debug, Clone, Copy, PartialEq, Eq)]
313pub(crate) struct RuleHolder {
314 rule: Rule,
315 index: RuleIndex,
316 parent: Option<PartialIndex>,
317}
318
319#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
320pub(crate) struct PartialRulePool {
321 n_traces: usize,
322 n_nodes: usize,
323 most_recent: PartialIndex,
324}
325
326impl PartialRulePool {
327 pub(crate) fn peek_fresh(&self) -> RuleIndex {
329 RuleIndex(self.n_nodes)
330 }
331
332 pub(crate) fn fresh(&mut self) -> RuleIndex {
333 let id = RuleIndex(self.n_nodes); self.n_nodes += 1;
335 id
336 }
337
338 pub(crate) fn fresh_trace(&mut self) -> TraceId {
339 let i = TraceId(self.n_traces);
340 self.n_traces += 1;
341 i
342 }
343
344 pub(crate) fn n_steps(&self) -> usize {
345 self.n_nodes
346 }
347
348 pub(crate) fn push_rule(&mut self, pool: &mut Vec<RuleHolder>, rule: Rule, index: RuleIndex) {
349 pool.push(RuleHolder {
350 rule,
351 index,
352 parent: Some(self.most_recent),
353 });
354 self.most_recent = PartialIndex(pool.len() - 1);
355 }
356
357 pub(crate) fn default_pool(cat: NodeIndex) -> Vec<RuleHolder> {
358 let mut v = Vec::with_capacity(100_000);
359 v.push(RuleHolder {
360 rule: Rule::Start {
361 node: cat,
362 child: RuleIndex(1),
363 },
364 index: RuleIndex(0),
365 parent: None,
366 });
367 v
368 }
369
370 pub(crate) fn into_rule_pool(self, big_pool: &[RuleHolder]) -> RulePool {
371 let mut pool = vec![None; self.n_nodes];
372 let mut i = Some(self.most_recent);
373
374 while i.is_some() {
375 let RuleHolder {
376 rule,
377 index,
378 parent,
379 } = big_pool[i.unwrap().0];
380 match rule {
381 Rule::UnmoveFromMover {
382 destination_id,
383 trace_id,
384 ..
385 }
386 | Rule::UnmergeFromMover {
387 destination_id,
388 trace_id,
389 ..
390 } => {
391 pool[destination_id.0] = Some(Rule::UnmoveTrace(trace_id));
392 }
393 _ => (),
394 }
395 pool[index.0] = Some(rule);
396 i = parent;
397 }
398
399 RulePool(pool.into_iter().collect::<Option<Vec<_>>>().unwrap())
400 }
401}
402
403impl Default for PartialRulePool {
404 fn default() -> Self {
405 PartialRulePool {
406 n_traces: 0,
407 n_nodes: 2,
408 most_recent: PartialIndex(0),
409 }
410 }
411}
412
413#[derive(Debug, Clone, Eq, PartialEq, Hash)]
416pub struct RulePool(Vec<Rule>);
417
418#[cfg(feature = "semantics")]
419pub mod semantics;
420
421impl RulePool {
422 #[cfg(feature = "pretty")]
423 pub(crate) fn complement_last(
424 &self,
425 x: RuleIndex,
426 ) -> impl DoubleEndedIterator<Item = RuleIndex> {
427 let x = self.get(x);
428 match x {
429 Rule::Start { child, .. } => [Some(*child), None],
430 Rule::UnmoveTrace(_) | Rule::Scan { .. } => [None, None],
431 Rule::Unmove {
432 child_id: a,
433 stored_id: b,
434 }
435 | Rule::Unmerge {
436 child_id: a,
437 complement_id: b,
438 ..
439 }
440 | Rule::UnmergeFromMover {
441 child_id: a,
442 stored_id: b,
443 ..
444 }
445 | Rule::UnmoveFromMover {
446 child_id: a,
447 stored_id: b,
448 ..
449 } => match (self.get(*a).is_scan(), self.get(*b).is_scan()) {
450 (true, false) => [Some(*a), Some(*b)],
451 (false, true) | (false, false) | (true, true) => [Some(*b), Some(*a)],
452 },
453 }
454 .into_iter()
455 .flatten()
456 }
457
458 #[must_use]
460 pub fn n_steps(&self) -> usize {
461 self.0.len()
462 }
463
464 fn get(&self, x: RuleIndex) -> &Rule {
465 &self.0[x.0]
466 }
467
468 fn iter(&self) -> impl Iterator<Item = &Rule> {
469 self.0.iter()
470 }
471
472 pub fn used_lemmas(&self) -> impl Iterator<Item = LexemeId> {
474 self.0.iter().filter_map(|x| {
475 if let Rule::Scan { lexeme, .. } = x {
476 Some(*lexeme)
477 } else {
478 None
479 }
480 })
481 }
482
483 #[must_use]
485 pub fn max_memory_load(&self) -> usize {
486 let mut max = 0;
487 let mut memory = HashSet::default();
488 for rule in &self.0 {
489 match rule {
490 Rule::UnmoveTrace(trace_id) => {
491 memory.insert(*trace_id);
492 if memory.len() > max {
493 max = memory.len();
494 }
495 }
496 Rule::UnmergeFromMover { trace_id, .. }
497 | Rule::UnmoveFromMover { trace_id, .. } => {
498 memory.remove(trace_id);
499 }
500 _ => (),
501 }
502 }
503 max
504 }
505}
506
507#[cfg(test)]
508mod test {
509 use crate::{
510 Direction, Lexicon, ParsingConfig, PhonContent, grammars, parsing::rules::DerivationStep,
511 };
512
513 #[test]
514 fn memory_load() -> anyhow::Result<()> {
515 let grammar = Lexicon::from_string("a::b= c= +a +e C\nb::b -a\nc::c -e")?;
516
517 let (_, _, rules) = grammar
518 .parse(
519 &[
520 PhonContent::Normal("c"),
521 PhonContent::Normal("b"),
522 PhonContent::Normal("a"),
523 ],
524 "C",
525 &crate::ParsingConfig::default(),
526 )?
527 .next()
528 .unwrap();
529
530 assert_eq!(rules.max_memory_load(), 2);
531 let grammar = Lexicon::from_string("a::b= +a c= +e C\nb::b -a\nc::c -e")?;
532
533 let (_, _, rules) = grammar
534 .parse(
535 &[
536 PhonContent::Normal("c"),
537 PhonContent::Normal("b"),
538 PhonContent::Normal("a"),
539 ],
540 "C",
541 &crate::ParsingConfig::default(),
542 )?
543 .next()
544 .unwrap();
545
546 assert_eq!(rules.max_memory_load(), 1);
547 Ok(())
548 }
549
550 #[test]
551 fn convert_to_derivation() -> anyhow::Result<()> {
552 let lexicon = Lexicon::from_string("a::b= s\nb::c= b\nc::c")?;
553 for (_, _, rules) in lexicon.generate("s", &ParsingConfig::default())? {
554 assert_eq!(
555 rules.as_derivation(&lexicon),
556 vec![
557 DerivationStep::Merge {
558 child: 1,
559 argument: 2,
560 direction: Direction::Right
561 },
562 DerivationStep::Lemma(Some("a")),
563 DerivationStep::Merge {
564 child: 3,
565 argument: 4,
566 direction: Direction::Right
567 },
568 DerivationStep::Lemma(Some("b")),
569 DerivationStep::Lemma(Some("c"))
570 ]
571 );
572 }
573
574 println!("The king knows which beer the queen drinks");
575 let lexicon = Lexicon::from_string(grammars::STABLER2011)?;
576 for (_, _, rules) in lexicon.parse(
577 &PhonContent::from([
578 "the", "king", "knows", "which", "beer", "the", "queen", "drinks",
579 ]),
580 "C",
581 &ParsingConfig::default(),
582 )? {
583 let d = rules.as_derivation(&lexicon);
584
585 for (i, x) in d.iter().enumerate() {
586 println!("{i}\t{x:?}");
587 }
588
589 assert_eq!(
590 d,
591 vec![
592 DerivationStep::Merge {
593 child: 1,
594 argument: 2,
595 direction: Direction::Right
596 },
597 DerivationStep::Lemma(None),
598 DerivationStep::Merge {
599 child: 3,
600 argument: 4,
601 direction: Direction::Left
602 },
603 DerivationStep::Merge {
604 child: 7,
605 argument: 8,
606 direction: Direction::Right
607 },
608 DerivationStep::Merge {
609 child: 5,
610 argument: 6,
611 direction: Direction::Right
612 },
613 DerivationStep::Lemma(Some("the")),
614 DerivationStep::Lemma(Some("king")),
615 DerivationStep::Lemma(Some("knows")),
616 DerivationStep::Move {
617 child: 9,
618 mover: 17
619 },
620 DerivationStep::Merge {
621 child: 10,
622 argument: 11,
623 direction: Direction::Right
624 },
625 DerivationStep::Lemma(None),
626 DerivationStep::Merge {
627 child: 12,
628 argument: 13,
629 direction: Direction::Left
630 },
631 DerivationStep::Merge {
632 child: 16,
633 argument: 17,
634 direction: Direction::Right
635 },
636 DerivationStep::Merge {
637 child: 14,
638 argument: 15,
639 direction: Direction::Right
640 },
641 DerivationStep::Lemma(Some("the")),
642 DerivationStep::Lemma(Some("queen")),
643 DerivationStep::Lemma(Some("drinks")),
644 DerivationStep::Merge {
645 child: 18,
646 argument: 19,
647 direction: Direction::Right
648 },
649 DerivationStep::Lemma(Some("which")),
650 DerivationStep::Lemma(Some("beer"))
651 ]
652 );
653 }
654
655 let lexicon = Lexicon::from_string(grammars::COPY_LANGUAGE)?;
656 println!("ABAB");
657
658 for (_, _, rules) in lexicon.parse(
659 &PhonContent::from(["a", "b", "a", "b"]),
660 "T",
661 &ParsingConfig::default(),
662 )? {
663 let d = rules.as_derivation(&lexicon);
664
665 for (i, x) in d.iter().enumerate() {
666 println!("{i}\t{x:?}");
667 }
668 assert_eq!(
669 d,
670 vec![
671 DerivationStep::Move { child: 1, mover: 4 },
672 DerivationStep::Move { child: 2, mover: 7 },
673 DerivationStep::Merge {
674 child: 3,
675 argument: 4,
676 direction: Direction::Left
677 },
678 DerivationStep::Lemma(None),
679 DerivationStep::Move {
680 child: 5,
681 mover: 10
682 },
683 DerivationStep::Merge {
684 child: 6,
685 argument: 7,
686 direction: Direction::Left
687 },
688 DerivationStep::Lemma(Some("b")),
689 DerivationStep::Move {
690 child: 8,
691 mover: 13
692 },
693 DerivationStep::Merge {
694 child: 9,
695 argument: 10,
696 direction: Direction::Left
697 },
698 DerivationStep::Lemma(Some("b")),
699 DerivationStep::Move {
700 child: 11,
701 mover: 16
702 },
703 DerivationStep::Merge {
704 child: 12,
705 argument: 13,
706 direction: Direction::Left
707 },
708 DerivationStep::Lemma(Some("a")),
709 DerivationStep::Move {
710 child: 14,
711 mover: 16
712 },
713 DerivationStep::Merge {
714 child: 15,
715 argument: 16,
716 direction: Direction::Left
717 },
718 DerivationStep::Lemma(Some("a")),
719 DerivationStep::Lemma(None)
720 ]
721 );
722 }
723
724 Ok(())
725 }
726}