1use std::borrow::Borrow;
3use std::cmp::Reverse;
4use std::collections::BinaryHeap;
5use std::fmt::Debug;
6use std::marker::PhantomData;
7
8use crate::lexicon::{Feature, FeatureOrLemma, LexemeId, Lexicon, ParsingError};
9use crate::parsing::rules::StolenInfo;
10use crate::parsing::trees::StolenHead;
11use crate::{Direction, ParseHeap, ParsingConfig};
12use beam::Scanner;
13use logprob::LogProb;
14use petgraph::graph::NodeIndex;
15use thin_vec::{ThinVec, thin_vec};
16pub use trees::GornIndex;
17use trees::{FutureTree, ParseMoment};
18
19use rules::Rule;
20pub use rules::RulePool;
21pub(crate) use rules::{PartialRulePool, RuleHolder, RuleIndex};
22
23#[derive(Debug, Clone)]
24struct AffixedHead {
25 heads: Vec<LexemeId>,
26 pos: usize,
27 head: usize,
28 tree: Option<FutureTree>,
29 current_index: GornIndex,
30}
31
32impl AffixedHead {
33 fn insert(&mut self, nx: LexemeId, dir: Direction) {
34 match dir {
35 Direction::Left => self.heads.insert(self.pos, nx),
36 Direction::Right => {
37 self.pos += 1;
38 self.heads.insert(self.pos, nx);
39 }
40 }
41 if self.pos <= self.head {
42 self.head += 1;
43 }
44 self.current_index.push(dir);
45 }
46
47 fn new(nx: LexemeId, tree: FutureTree) -> Self {
48 AffixedHead {
49 heads: vec![nx],
50 pos: 0,
51 head: 0,
52 tree: Some(tree),
53 current_index: GornIndex::default(),
54 }
55 }
56}
57
58#[derive(Debug, Clone)]
59pub(crate) struct BeamWrapper<T, B: Scanner<T>> {
60 log_prob: LogProb<f64>,
61 queue: BinaryHeap<Reverse<ParseMoment>>,
62 heads: ThinVec<AffixedHead>,
63 rules: PartialRulePool,
64 n_consec_empties: usize,
65 pub beam: B,
66 phantom: PhantomData<T>,
67}
68
69impl<T: Eq + std::fmt::Debug, B: Scanner<T> + Eq> PartialEq for BeamWrapper<T, B> {
70 fn eq(&self, other: &Self) -> bool {
71 self.beam == other.beam
72 && self.log_prob == other.log_prob
73 && self.queue.clone().into_sorted_vec() == other.queue.clone().into_sorted_vec()
74 }
75}
76
77impl<T: Eq + std::fmt::Debug, B: Scanner<T> + Eq> Eq for BeamWrapper<T, B> {}
78
79impl<T: Eq + std::fmt::Debug, B: Scanner<T> + Eq> PartialOrd for BeamWrapper<T, B> {
80 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
81 Some(self.cmp(other))
82 }
83}
84
85impl<T: Eq + std::fmt::Debug, B: Scanner<T> + Eq> Ord for BeamWrapper<T, B> {
86 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
87 self.log_prob.cmp(&other.log_prob)
88 }
89}
90
91impl<T: Clone + Eq + std::fmt::Debug, B: Scanner<T> + Eq + Clone> BeamWrapper<T, B> {
92 fn scan<Category: Eq + std::fmt::Debug + Clone>(
93 mut self,
94 v: &mut ParseHeap<T, B>,
95 moment: &ParseMoment,
96 s: &Option<T>,
97 child_node: LexemeId,
98 child_prob: LogProb<f64>,
99 lexicon: &Lexicon<T, Category>,
100 ) {
101 match s {
102 Some(_) => self.n_consec_empties = 0,
103 None => self.n_consec_empties += 1,
104 }
105 let rule = match moment.stolen_head {
106 StolenHead::Root(head_id) => {
107 let heads = &mut self.heads[head_id];
108 heads.heads[heads.head] = child_node;
111
112 let heads = heads
113 .heads
114 .iter()
115 .filter_map(|x| lexicon.leaf_to_lemma(*x).unwrap().as_ref())
116 .collect();
117
118 if self.beam.multiscan(heads) {
119 Some(Rule::Scan {
120 lexeme: child_node,
121 stolen: StolenInfo::Stealer,
122 })
123 } else {
124 None
125 }
126 }
127 StolenHead::Affix {
128 rule,
129 direction,
130 stealer_id,
131 is_done,
132 } => {
133 if is_done {
134 let tree = self.heads[stealer_id].tree.take().unwrap();
135 self.add_tree_to_queue(tree, thin_vec![], StolenHead::Root(stealer_id));
136 }
137 self.heads[stealer_id].insert(child_node, direction);
138 Some(Rule::Scan {
139 lexeme: child_node,
140 stolen: StolenInfo::Stolen(rule, self.heads[stealer_id].current_index),
141 })
142 }
143 StolenHead::None => {
144 if self.beam.scan(s) {
145 Some(Rule::Scan {
146 lexeme: child_node,
147 stolen: StolenInfo::Normal,
148 })
149 } else {
150 None
151 }
152 }
153 };
154
155 if let Some(rule) = rule {
156 self.log_prob += child_prob;
157 self.rules.push_rule(v.rules_mut(), rule, moment.tree.id);
158 v.push(self);
159 }
160 }
161
162 fn new_future_tree(&mut self, node: NodeIndex, index: GornIndex) -> (FutureTree, RuleIndex) {
163 let id = self.rules.fresh();
164 (FutureTree { node, index, id }, id)
165 }
166
167 fn push_moment(
168 &mut self,
169 node: NodeIndex,
170 index: GornIndex,
171 movers: ThinVec<FutureTree>,
172 stolen_head_info: StolenHead,
173 ) -> RuleIndex {
174 let (tree, id) = self.new_future_tree(node, index);
175 self.add_tree_to_queue(tree, movers, stolen_head_info);
176 id
177 }
178
179 fn add_tree_to_queue(
180 &mut self,
181 tree: FutureTree,
182 movers: ThinVec<FutureTree>,
183 stolen_head: StolenHead,
184 ) {
185 self.queue.push(Reverse(ParseMoment {
186 tree,
187 stolen_head,
188 movers,
189 }));
190 }
191
192 pub(super) fn new(beam: B, category_index: NodeIndex) -> Self {
193 let mut queue = BinaryHeap::<Reverse<ParseMoment>>::with_capacity(5);
194 queue.push(Reverse(ParseMoment::new(
195 FutureTree {
196 node: category_index,
197 index: GornIndex::default(),
198 id: RuleIndex::one(),
199 },
200 thin_vec![],
201 StolenHead::None,
202 )));
203 BeamWrapper {
204 beam,
205 n_consec_empties: 0,
206 queue,
207 heads: thin_vec![],
208 log_prob: LogProb::prob_of_one(),
209 rules: PartialRulePool::default(),
210 phantom: PhantomData,
211 }
212 }
213
214 pub fn log_prob(&self) -> LogProb<f64> {
215 self.log_prob
216 }
217
218 pub fn n_steps(&self) -> usize {
219 self.rules.n_steps()
220 }
221
222 pub fn n_consecutive_empty(&self) -> usize {
223 self.n_consec_empties
224 }
225
226 pub fn pop_moment(&mut self) -> Option<ParseMoment> {
227 if let Some(Reverse(x)) = self.queue.pop() {
228 Some(x)
229 } else {
230 None
231 }
232 }
233
234 pub fn is_empty(&self) -> bool {
235 self.queue.is_empty()
236 }
237}
238
239fn clone_push<T: Clone>(v: &[T], x: T) -> ThinVec<T> {
240 let mut v: ThinVec<T> = ThinVec::from(v);
241 v.push(x);
242 v
243}
244
245#[allow(clippy::too_many_arguments)]
246#[inline]
247fn unmerge_from_mover<
248 T: Eq + std::fmt::Debug + Clone,
249 Category: Eq + std::fmt::Debug + Clone,
250 B: Scanner<T> + Clone + Eq,
251>(
252 v: &mut ParseHeap<T, B>,
253 lexicon: &Lexicon<T, Category>,
254 moment: &ParseMoment,
255 beam: &BeamWrapper<T, B>,
256 cat: &Category,
257 child_node: NodeIndex,
258 dir: Direction,
259 child_prob: LogProb<f64>,
260) -> bool {
261 let mut new_beam = false;
262 for mover in &moment.movers {
263 for stored_child_node in lexicon.children_of(mover.node) {
264 let (stored, stored_prob) = lexicon.get(stored_child_node).unwrap();
265 match stored {
266 FeatureOrLemma::Feature(Feature::Category(stored)) if stored == cat => {
267 let mut beam = beam.clone();
268 let movers = moment
269 .movers
270 .iter()
271 .filter(|&v| v != mover)
272 .copied()
273 .collect();
274
275 let (stored_movers, child_movers) = if lexicon.is_complement(child_node) {
276 (movers, thin_vec![])
277 } else {
278 (thin_vec![], movers)
279 };
280
281 let child_id = beam.push_moment(
282 child_node,
283 moment.tree.index,
284 child_movers,
285 moment.stolen_head,
286 );
287
288 let stored_id = beam.push_moment(
289 stored_child_node,
290 mover.index,
291 stored_movers,
292 StolenHead::None,
293 );
294
295 beam.log_prob += stored_prob + child_prob;
296
297 let trace_id = beam.rules.fresh_trace();
298
299 beam.rules.push_rule(
300 v.rules_mut(),
301 Rule::UnmergeFromMover {
302 child: child_node,
303 child_id,
304 stored_id,
305 trace_id,
306 destination_id: mover.id,
307 dir,
308 },
309 moment.tree.id,
310 );
311 v.push(beam);
312 new_beam = true;
313 }
314 _ => (),
315 }
316 }
317 }
318 new_beam
319}
320
321#[allow(clippy::too_many_arguments)]
322#[inline]
323fn unmerge<
324 T: Eq + std::fmt::Debug + Clone,
325 Category: Eq + std::fmt::Debug + Clone,
326 B: Scanner<T> + Eq + Clone,
327>(
328 v: &mut ParseHeap<T, B>,
329 lexicon: &Lexicon<T, Category>,
330 moment: &ParseMoment,
331 mut beam: BeamWrapper<T, B>,
332 cat: &Category,
333 dir: Direction,
334 child_node: NodeIndex,
335 child_prob: LogProb<f64>,
336 head_info: HeadMovement,
337) -> Result<(), ParsingError<Category>> {
338 let complement = lexicon.find_category(cat)?;
339
340 let (complement_movers, child_movers) = if lexicon.is_complement(child_node) {
343 (moment.movers.clone(), thin_vec![])
344 } else {
345 (thin_vec![], moment.movers.clone())
346 };
347
348 let (child_head, comp_head) = match head_info {
349 HeadMovement::HeadMovement { stealer, stolen } => (stealer, stolen),
350 HeadMovement::Inherit => (moment.stolen_head, StolenHead::None),
351 };
352
353 let (child_tree, child_id) =
354 beam.new_future_tree(child_node, moment.tree.index.clone_push(dir.flip()));
355 let (complement_tree, complement_id) =
356 beam.new_future_tree(complement, moment.tree.index.clone_push(dir));
357
358 match (child_head, comp_head) {
359 (StolenHead::None, StolenHead::None) => {
360 beam.add_tree_to_queue(child_tree, child_movers, StolenHead::None);
362 beam.add_tree_to_queue(complement_tree, complement_movers, StolenHead::None);
363 }
364 (StolenHead::Root(_), stole @ StolenHead::Affix { .. }) => {
365 #[cfg(test)]
367 assert!(child_movers.is_empty());
368
369 beam.heads
370 .push(AffixedHead::new(LexemeId(child_node), child_tree)); beam.add_tree_to_queue(complement_tree, complement_movers, stole.with_done());
373 }
374 (StolenHead::Root(_), StolenHead::Root(_)) => panic!("A"),
375 (stole_child @ StolenHead::Affix { .. }, stolen_comp @ StolenHead::Affix { .. }) => {
376 #[cfg(test)]
377 assert!(child_movers.is_empty());
378
379 beam.add_tree_to_queue(child_tree, child_movers, stole_child.with_not_done());
382 beam.add_tree_to_queue(complement_tree, complement_movers, stolen_comp.with_done());
383 }
384 (stole @ StolenHead::Affix { .. }, StolenHead::None) => {
385 #[cfg(test)]
388 assert!(child_movers.is_empty());
389 beam.add_tree_to_queue(child_tree, child_movers, stole.with_done());
390 beam.add_tree_to_queue(complement_tree, complement_movers, StolenHead::None);
391 }
392 (_, _) => panic!("Should be impossible"),
393 }
394
395 beam.log_prob += child_prob;
396 beam.rules.push_rule(
397 v.rules_mut(),
398 Rule::Unmerge {
399 child: child_node,
400 child_id,
401 complement_id,
402 dir,
403 affix: head_info != HeadMovement::Inherit, },
405 moment.tree.id,
406 );
407 v.push(beam);
408 Ok(())
409}
410
411#[allow(clippy::too_many_arguments)]
412#[inline]
413fn unmove_from_mover<
414 T: Eq + std::fmt::Debug + Clone,
415 Category: Eq + std::fmt::Debug + Clone,
416 B: Scanner<T> + Clone + Eq,
417>(
418 v: &mut ParseHeap<T, B>,
419 lexicon: &Lexicon<T, Category>,
420 moment: &ParseMoment,
421 beam: &BeamWrapper<T, B>,
422 cat: &Category,
423 child_node: NodeIndex,
424 child_prob: LogProb<f64>,
425 already_mover_of_this_cat: bool,
426) -> bool {
427 let mut new_beam_found = false;
428
429 for mover in moment
430 .movers
431 .iter()
432 .filter(|x| lexicon.get_feature_category(x.node) == Some(cat) || !already_mover_of_this_cat)
433 {
439 for stored_child_node in lexicon.children_of(mover.node) {
440 let (stored, stored_prob) = lexicon.get(stored_child_node).unwrap();
441 match stored {
442 FeatureOrLemma::Feature(Feature::Licensee(s)) if cat == s => {
443 let mut beam = beam.clone();
444 let (stored_tree, stored_id) =
445 beam.new_future_tree(stored_child_node, mover.index);
446
447 let movers = moment
448 .movers
449 .iter()
450 .filter(|&v| v != mover)
451 .copied()
452 .chain(std::iter::once(stored_tree))
453 .collect();
454
455 let child_id =
456 beam.push_moment(child_node, moment.tree.index, movers, moment.stolen_head);
457 beam.log_prob += stored_prob + child_prob;
458
459 let trace_id = beam.rules.fresh_trace();
460 beam.rules.push_rule(
461 v.rules_mut(),
462 Rule::UnmoveFromMover {
463 child_id,
464 stored_id,
465 trace_id,
466 destination_id: mover.id,
467 },
468 moment.tree.id,
469 );
470 v.push(beam);
471 new_beam_found = true;
472 }
473 _ => (),
474 }
475 }
476 }
477 new_beam_found
478}
479
480#[allow(clippy::too_many_arguments)]
481#[inline]
482fn unmove<
483 T: Eq + std::fmt::Debug + Clone,
484 Category: Eq + std::fmt::Debug + Clone,
485 B: Scanner<T> + Eq + Clone,
486>(
487 v: &mut ParseHeap<T, B>,
488 lexicon: &Lexicon<T, Category>,
489 moment: &ParseMoment,
490 mut beam: BeamWrapper<T, B>,
491 cat: &Category,
492 child_node: NodeIndex,
493 child_prob: LogProb<f64>,
494) -> Result<(), ParsingError<Category>> {
495 let stored = lexicon.find_licensee(cat)?;
496 let (stored, stored_id) =
497 beam.new_future_tree(stored, moment.tree.index.clone_push(Direction::Left));
498
499 let child_id = beam.push_moment(
500 child_node,
501 moment.tree.index.clone_push(Direction::Right),
502 clone_push(&moment.movers, stored),
503 moment.stolen_head,
504 );
505
506 beam.log_prob += child_prob;
507 beam.rules.push_rule(
508 v.rules_mut(),
509 Rule::Unmove {
510 child_id,
511 stored_id,
512 },
513 moment.tree.id,
514 );
515 v.push(beam);
516 Ok(())
517}
518
519#[derive(Debug, Clone, Copy, Eq, PartialEq)]
520enum HeadMovement {
521 HeadMovement {
522 stealer: StolenHead,
523 stolen: StolenHead,
524 },
525 Inherit,
526}
527
528#[inline]
529fn set_beam_head<T: Eq + std::fmt::Debug + Clone, B: Scanner<T> + Eq + Clone>(
530 moment: &ParseMoment,
531 dir: Direction,
532 beam: &mut BeamWrapper<T, B>,
533) -> HeadMovement {
534 match moment.stolen_head {
535 StolenHead::Root(_) => panic!(
536 "Only possible if there are multiple head movements to one lemma which is not yet supported"
537 ),
538 StolenHead::Affix {
539 stealer_id,
540 rule,
541 direction: old_dir,
542 is_done,
543 } => HeadMovement::HeadMovement {
544 stealer: StolenHead::Affix {
545 stealer_id,
546 rule,
547 direction: old_dir,
548 is_done: false,
549 },
550 stolen: StolenHead::Affix {
551 stealer_id,
552 direction: dir,
553 rule,
554 is_done,
555 },
556 },
557 StolenHead::None => {
558 let id = beam.heads.len();
559 HeadMovement::HeadMovement {
560 stealer: StolenHead::Root(id),
561 stolen: StolenHead::new_stolen(beam.rules.peek_fresh(), dir, id),
562 }
563 }
564 }
565}
566
567pub(crate) fn expand<
568 T: Eq + std::fmt::Debug + Clone,
569 Category: Eq + std::fmt::Debug + Clone,
570 B: Scanner<T> + Eq + Clone,
571 L: Borrow<Lexicon<T, Category>>,
572>(
573 extender: &mut ParseHeap<T, B>,
574 moment: ParseMoment,
575 beam: BeamWrapper<T, B>,
576 lexicon: L,
577 config: &ParsingConfig,
578) {
579 let lexicon = lexicon.borrow();
580 let n_children = lexicon.n_children(moment.tree.node);
581 let new_beams = itertools::repeat_n(beam, n_children);
582
583 new_beams
584 .zip(lexicon.children_of(moment.tree.node))
585 .for_each(
586 |(mut beam, child_node)| match lexicon.get(child_node).unwrap() {
587 (FeatureOrLemma::Lemma(s), p) if moment.should_be_scanned() => {
588 beam.scan(extender, &moment, s, LexemeId(child_node), p, lexicon);
589 }
590 (FeatureOrLemma::Complement(cat, dir) |
591FeatureOrLemma::Feature(Feature::Selector(cat, dir)), mut p) => {
592 if unmerge_from_mover(
593 extender,
594 lexicon,
595 &moment,
596 &beam,
597 cat,
598 child_node,
599 *dir,
600 if lexicon.find_category(cat).is_ok() {
601 p + config.move_prob
602 } else {
603 p
604 },
605 ) {
606 p += config.dont_move_prob;
607 }
608 let _ = unmerge(
609 extender,
610 lexicon,
611 &moment,
612 beam,
613 cat,
614 *dir,
615 child_node,
616 p,
617 HeadMovement::Inherit,
618 );
619 }
620 (FeatureOrLemma::Feature(Feature::Licensor(cat)), mut p) => {
621 let already_mover_of_this_cat = moment.movers.iter().any(|x| {
622 lexicon
623 .get_feature_category(x.node)
624 .is_some_and(|x| x == cat)
625 });
626 if unmove_from_mover(
627 extender,
628 lexicon,
629 &moment,
630 &beam,
631 cat,
632 child_node,
633 if lexicon.find_licensee(cat).is_ok() {
634 p + config.move_prob
635 } else {
636 p
637 },
638 already_mover_of_this_cat,
639 ) {
640 p += config.dont_move_prob;
641 }
642 if !already_mover_of_this_cat {
643 let _ = unmove(extender, lexicon, &moment, beam, cat, child_node, p);
645 }
646 }
647 (FeatureOrLemma::Feature(Feature::Affix(cat, dir)), p) => {
648 let head_info = set_beam_head(&moment, *dir, &mut beam);
649 let _ = unmerge(
652 extender,
653 lexicon,
654 &moment,
655 beam,
656 cat,
657 Direction::Right,
658 child_node,
659 p,
660 head_info,
661 );
662 }
663 (FeatureOrLemma::Lemma(_) |
664FeatureOrLemma::Feature(Feature::Category(_) | Feature::Licensee(_)), _) => (),
665 (FeatureOrLemma::Root, _) => unimplemented!("Impossible to parse the root node"),
666 },
667 );
668}
669
670pub mod beam;
671pub mod rules;
672mod trees;
673pub use trees::MAX_STEPS;
674
675#[cfg(test)]
676mod tests;