minimalist_grammar_parser/
lib.rs

1//!This crate defines a number of structs and methods to parse and generate Minimalist Grammars
2//!(MGs) from Stabler (1997). Specifically, it implements a variety of the MG algorithm adapted
3//!from Stabler (2011) and Stabler (2013)
4//!
5//!
6//!# Examples of usage
7//!The following code generates 4 sentences from the $a^nb^n$ language.
8//!```
9//!use minimalist_grammar_parser::Lexicon;
10//!use minimalist_grammar_parser::ParsingConfig;
11//!# use minimalist_grammar_parser::lexicon::LexiconParsingError;
12//!let lexicon = Lexicon::from_string("a::s= b= s\nb::b\n::s")?;
13//!let v = lexicon
14//!    .generate("s", &ParsingConfig::default())?
15//!    .take(4)
16//!    .map(|(_prob, s, _rules)| {
17//!        s.into_iter()
18//!            .map(|word| word.try_inner().unwrap())
19//!            .collect::<Vec<_>>()
20//!            .join("")
21//!    })
22//!    .collect::<Vec<_>>();
23//!assert_eq!(v, vec!["", "ab", "aabb", "aaabbb"]);
24//!# Ok::<(), anyhow::Error>(())
25//!```
26//!## References
27//!
28//! - Stabler, E. (1997). Derivational minimalism. In C. Retoré (Ed.), Logical Aspects of Computational Linguistics (pp. 68–95). Springer. <https://doi.org/10.1007/BFb0052152>
29//! - Stabler, E. (2011). Top-Down Recognizers for MCFGs and MGs. In F. Keller & D. Reitter (Eds.), Proceedings of the 2nd Workshop on Cognitive Modeling and Computational Linguistics (pp. 39–48). Association for Computational Linguistics. <https://aclanthology.org/W11-0605>
30//! - Stabler, E. (2013). Two Models of Minimalist, Incremental Syntactic Analysis. Topics in Cognitive Science, 5(3), 611–633. <https://doi.org/10.1111/tops.12031>
31//!
32//!
33#![warn(missing_docs)]
34
35use std::borrow::Borrow;
36use std::fmt::{Debug, Display};
37use std::hash::Hash;
38use std::marker::PhantomData;
39
40#[cfg(not(target_arch = "wasm32"))]
41use std::time::{Duration, Instant};
42
43pub use lexicon::{Lexicon, ParsingError};
44
45use logprob::LogProb;
46use min_max_heap::MinMaxHeap;
47use parsing::RuleHolder;
48
49pub use parsing::RulePool;
50use parsing::beam::{FuzzyScan, GeneratorScan, ParseScan, Scanner};
51use parsing::{BeamWrapper, PartialRulePool, expand};
52use petgraph::graph::NodeIndex;
53
54#[cfg(feature = "sampling")]
55use rand::Rng;
56#[cfg(feature = "sampling")]
57use rand_distr::Distribution;
58#[cfg(feature = "sampling")]
59use rand_distr::weighted::WeightedIndex;
60
61use serde::{Deserialize, Serialize, Serializer};
62use thiserror::Error;
63
64///The basic input type of the library used by generation and parsing a like
65#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
66pub enum PhonContent<T> {
67    ///Normal words
68    Normal(T),
69    ///Words that are built out of combining heads with head movement
70    Affixed(Vec<T>),
71}
72
73///Whether a word has a given pronounciation or not.
74#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Deserialize)]
75#[serde(from = "Option<T>")]
76pub enum Pronounciation<T> {
77    ///An empty pronounciation
78    Unpronounced,
79    ///The pronounciation
80    Pronounced(T),
81}
82
83impl<T: Serialize> Serialize for Pronounciation<T> {
84    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
85        let opt: Option<&T> = match self {
86            Pronounciation::Pronounced(t) => Some(t),
87            Pronounciation::Unpronounced => None,
88        };
89        opt.serialize(serializer)
90    }
91}
92
93impl<T> Pronounciation<T> {
94    ///Convert from [`&Pronounciation<T>`] to [`Pronounciation<&T>`].
95    pub const fn as_ref(&self) -> Pronounciation<&T> {
96        match *self {
97            Pronounciation::Pronounced(ref x) => Pronounciation::Pronounced(x),
98            Pronounciation::Unpronounced => Pronounciation::Unpronounced,
99        }
100    }
101
102    ///Convert from [`Pronounciation<T>`] to [`Pronounciation<U>`] with a closure.
103    pub fn map<U, F>(self, f: F) -> Pronounciation<U>
104    where
105        F: FnOnce(T) -> U,
106    {
107        match self {
108            Pronounciation::Pronounced(x) => Pronounciation::Pronounced(f(x)),
109            Pronounciation::Unpronounced => Pronounciation::Unpronounced,
110        }
111    }
112}
113
114impl<T> From<Pronounciation<T>> for Option<T> {
115    fn from(value: Pronounciation<T>) -> Self {
116        match value {
117            Pronounciation::Pronounced(t) => Some(t),
118            Pronounciation::Unpronounced => None,
119        }
120    }
121}
122
123impl<T> From<Option<T>> for Pronounciation<T> {
124    fn from(value: Option<T>) -> Self {
125        match value {
126            Some(t) => Pronounciation::Pronounced(t),
127            None => Pronounciation::Unpronounced,
128        }
129    }
130}
131
132impl<T: Display> Display for Pronounciation<T> {
133    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
134        match self {
135            Pronounciation::Pronounced(x) => write!(f, "{x}"),
136            Pronounciation::Unpronounced => write!(f, "ε"),
137        }
138    }
139}
140
141#[derive(Debug, Copy, Clone, PartialEq, Eq, Error)]
142///Error caused by flattening a sentence that has affixes in it.
143pub struct FlattenError {}
144impl Display for FlattenError {
145    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
146        write!(f, "This Input is not a Normal variant")
147    }
148}
149
150impl<T> PhonContent<T> {
151    ///Try to flatten the output assuming that it is [`PhonContent::Normal`]
152    pub fn try_inner(self) -> Result<T, FlattenError> {
153        match self {
154            PhonContent::Normal(x) => Ok(x),
155            PhonContent::Affixed(_) => Err(FlattenError {}),
156        }
157    }
158
159    ///Create new input assuming there are no affixes.
160    pub fn new(x: Vec<T>) -> Vec<PhonContent<T>> {
161        x.into_iter().map(PhonContent::Normal).collect()
162    }
163
164    ///Create new input assuming there are no affixes
165    pub fn from<const N: usize>(x: [T; N]) -> [PhonContent<T>; N] {
166        x.map(PhonContent::Normal)
167    }
168
169    ///Try to flatten the output assuming that all members of the vector are
170    ///[`PhonContent::Normal`]
171    pub fn try_flatten(x: Vec<PhonContent<T>>) -> Result<Vec<T>, FlattenError> {
172        x.into_iter().map(PhonContent::try_inner).collect()
173    }
174}
175impl PhonContent<&str> {
176    ///Try to flatten the output and join all affixes without spaces
177    #[must_use]
178    pub fn flatten(x: Vec<PhonContent<&str>>) -> Vec<String> {
179        let mut v = vec![];
180        for content in x {
181            match content {
182                PhonContent::Normal(val) => v.push(val.to_string()),
183                PhonContent::Affixed(items) => v.push(items.join("")),
184            }
185        }
186        v
187    }
188}
189
190impl<T: Display> Display for PhonContent<T> {
191    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
192        match self {
193            PhonContent::Normal(x) => write!(f, "{x}")?,
194            PhonContent::Affixed(items) => {
195                let mut first = true;
196                for x in items {
197                    if !first {
198                        write!(f, "-")?;
199                    }
200                    first = false;
201
202                    write!(f, "{x}")?;
203                }
204            }
205        }
206        Ok(())
207    }
208}
209
210///Enum to record the direction of a merge/move operation (whether the phonological value goes to
211///the right or left)
212#[allow(missing_docs)]
213#[derive(
214    Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Serialize, Deserialize,
215)]
216pub enum Direction {
217    #[default]
218    Left,
219    Right,
220}
221
222impl Direction {
223    ///Swaps direction so that left is right and vice-versa
224    #[must_use]
225    pub fn flip(&self) -> Self {
226        match self {
227            Direction::Left => Direction::Right,
228            Direction::Right => Direction::Left,
229        }
230    }
231}
232
233impl From<Direction> for bool {
234    fn from(value: Direction) -> Self {
235        match value {
236            Direction::Left => false,
237            Direction::Right => true,
238        }
239    }
240}
241
242impl From<bool> for Direction {
243    fn from(value: bool) -> Self {
244        if value {
245            Direction::Right
246        } else {
247            Direction::Left
248        }
249    }
250}
251
252///This struct defines the configuration used when parsing a [`Lexicon`].
253///
254///It has the following options:
255/// - `min_log_prob`: The lowest probability string that the parser will consider. (Default = -256)
256/// - `move_prob`: The probability of moving rather than merging when both are available (Default = log(0.5))
257/// - `max_steps`: The maximum number of derivational steps before crashing (Default = 256)
258/// - `max_beams`: The maximum number of competing parses available in the parse heap at a single time (Default = 256)
259/// - `max_time`: The maximum amount of *time* before the parse crashes (not available on wasm32). Disabled by default
260#[derive(Debug, Copy, Clone, PartialEq, Eq)]
261pub struct ParsingConfig {
262    min_log_prob: Option<LogProb<f64>>,
263    move_prob: LogProb<f64>,
264    dont_move_prob: LogProb<f64>,
265    max_steps: Option<usize>,
266    max_beams: Option<usize>,
267    max_consecutive_empty: Option<usize>,
268
269    #[cfg(not(target_arch = "wasm32"))]
270    max_time: Option<Duration>,
271}
272
273impl ParsingConfig {
274    ///Create a new [`ParsingConfig`] with no limits on parsing and default move probability. Be careful to ensure when parsing
275    ///or generating with this config to avoid infinite loops (at the very least use
276    ///[`ParsingConfig::with_max_time`]).
277    #[must_use]
278    pub fn empty() -> ParsingConfig {
279        let move_prob = LogProb::from_raw_prob(0.5).unwrap();
280        let dont_move_prob = move_prob.opposite_prob();
281
282        ParsingConfig {
283            min_log_prob: None,
284            move_prob,
285            dont_move_prob,
286            max_consecutive_empty: None,
287            max_steps: None,
288            max_beams: None,
289            #[cfg(not(target_arch = "wasm32"))]
290            max_time: None,
291        }
292    }
293
294    ///Create a new [`ParsingConfig`] with the following parameters
295    #[must_use]
296    pub fn new(
297        min_log_prob: LogProb<f64>,
298        move_prob: LogProb<f64>,
299        max_steps: usize,
300        max_beams: usize,
301    ) -> ParsingConfig {
302        let max_steps = usize::min(parsing::MAX_STEPS, max_steps);
303        let merge_prob = move_prob.opposite_prob();
304        ParsingConfig {
305            min_log_prob: Some(min_log_prob),
306            move_prob,
307            dont_move_prob: merge_prob,
308            max_consecutive_empty: None,
309            max_steps: Some(max_steps),
310            max_beams: Some(max_beams),
311            #[cfg(not(target_arch = "wasm32"))]
312            max_time: None,
313        }
314    }
315
316    ///Set the maximum time before timing out a parse (not available on `wasm32`).
317    #[cfg(not(target_arch = "wasm32"))]
318    #[must_use]
319    pub fn with_max_time(mut self, duration: Duration) -> Self {
320        self.max_time = Some(duration);
321        self
322    }
323
324    ///Set the maximum number of repeated empty heads.
325    #[must_use]
326    pub fn with_max_consecutive_empty(mut self, n: usize) -> Self {
327        self.max_consecutive_empty = Some(n);
328        self
329    }
330
331    ///Set the minimum log probability for a parse.
332    #[must_use]
333    pub fn with_min_log_prob(mut self, min_log_prob: LogProb<f64>) -> Self {
334        self.min_log_prob = Some(min_log_prob);
335        self
336    }
337
338    ///Set the maximum number of derivational steps for a parse.
339    #[must_use]
340    pub fn with_max_steps(mut self, max_steps: usize) -> Self {
341        self.max_steps = Some(max_steps);
342        self
343    }
344
345    ///Set the maximum number of competing parses at a single time.
346    #[must_use]
347    pub fn with_max_beams(mut self, max_beams: usize) -> Self {
348        self.max_beams = Some(max_beams);
349        self
350    }
351
352    ///Set the probability of moving as opposed to merging.
353    #[must_use]
354    pub fn with_move_prob(mut self, move_prob: LogProb<f64>) -> Self {
355        self.move_prob = move_prob;
356        self.dont_move_prob = self.move_prob.opposite_prob();
357        self
358    }
359}
360
361impl Default for ParsingConfig {
362    fn default() -> Self {
363        ParsingConfig::new(
364            LogProb::new(-256.0).unwrap(),
365            LogProb::from_raw_prob(0.5).unwrap(),
366            128,
367            256,
368        )
369    }
370}
371
372#[derive(Debug, Copy, Clone, Eq, PartialEq)]
373struct BeamKey(LogProb<f64>, usize);
374
375impl PartialOrd for BeamKey {
376    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
377        Some(self.cmp(other))
378    }
379}
380
381impl Ord for BeamKey {
382    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
383        self.0.cmp(&other.0)
384    }
385}
386
387#[derive(Debug, Clone)]
388struct ParseHeap<T, B: Scanner<T>> {
389    parse_heap: MinMaxHeap<BeamKey>,
390    phantom: PhantomData<T>,
391    config: ParsingConfig,
392    rule_arena: Vec<RuleHolder>,
393    beam_arena: Vec<Option<BeamWrapper<T, B>>>,
394    #[cfg(feature = "sampling")]
395    random_buffer: Vec<BeamWrapper<T, B>>,
396    #[cfg(feature = "sampling")]
397    head: Option<BeamWrapper<T, B>>,
398    #[cfg(feature = "sampling")]
399    random_order: bool,
400}
401
402impl<T: Eq + std::fmt::Debug + Clone, B: Scanner<T> + Eq + Clone> ParseHeap<T, B> {
403    ///Retain only elements in the closure, which may be mutated.
404    fn retain_map<F: FnMut(BeamWrapper<T, B>) -> Option<BeamWrapper<T, B>>>(&mut self, mut f: F) {
405        let mut heap = MinMaxHeap::new();
406        std::mem::swap(&mut heap, &mut self.parse_heap);
407        self.parse_heap.extend(heap.into_iter().filter(|x| {
408            let v = self.beam_arena.get_mut(x.1).unwrap().take().unwrap();
409            if let Some(v) = f(v) {
410                self.beam_arena[x.1] = Some(v);
411                true
412            } else {
413                false
414            }
415        }));
416    }
417
418    #[cfg(feature = "sampling")]
419    fn pop(&mut self) -> Option<BeamWrapper<T, B>> {
420        self.head.take().or_else(|| {
421            self.parse_heap
422                .pop_max()
423                .map(|x| self.beam_arena[x.1].take().unwrap())
424        })
425    }
426    #[cfg(not(feature = "sampling"))]
427    fn pop(&mut self) -> Option<BeamWrapper<T, B>> {
428        self.parse_heap
429            .pop_max()
430            .map(|x| self.beam_arena[x.1].take().unwrap())
431    }
432
433    fn can_push(&self, v: &BeamWrapper<T, B>) -> bool {
434        let is_probable_enough = self.config.min_log_prob.is_none_or(|p| v.log_prob() > p);
435        let is_short_enough = self
436            .config
437            .max_steps
438            .is_none_or(|max_steps| v.n_steps() < max_steps);
439        let is_not_fake_structure = self
440            .config
441            .max_consecutive_empty
442            .is_none_or(|n_empty| v.n_consecutive_empty() <= n_empty);
443        is_short_enough && is_probable_enough && is_not_fake_structure
444    }
445
446    #[cfg(feature = "sampling")]
447    fn process_randoms(&mut self, rng: &mut impl Rng) {
448        let weights = self
449            .random_buffer
450            .iter()
451            .map(|x| -x.log_prob().into_inner())
452            .collect::<Vec<_>>();
453        if !weights.is_empty() {
454            let head_id = if weights.len() > 1 {
455                let weights = WeightedIndex::new(weights).unwrap();
456                weights.sample(rng)
457            } else {
458                0
459            };
460
461            //maybe could be optimized by making `add_to_heap` not depend on self.
462            let buffer = std::mem::take(&mut self.random_buffer);
463            for (i, beam) in buffer.into_iter().enumerate() {
464                if i == head_id {
465                    self.head = Some(beam);
466                } else {
467                    self.add_to_heap(beam);
468                }
469            }
470        }
471    }
472
473    fn add_to_heap(&mut self, v: BeamWrapper<T, B>) {
474        let key = BeamKey(v.log_prob(), self.beam_arena.len());
475        if let Some(max_beams) = self.config.max_beams
476            && self.parse_heap.len() > max_beams
477        {
478            let x = self.parse_heap.push_pop_min(key);
479            if x.1 != key.1 {
480                //only delete if its not the same thing
481                self.beam_arena[x.1] = None;
482            }
483        } else {
484            self.parse_heap.push(key);
485        }
486        self.beam_arena.push(Some(v));
487    }
488
489    fn push(&mut self, v: BeamWrapper<T, B>) {
490        if self.can_push(&v) {
491            #[cfg(feature = "sampling")]
492            if self.random_order {
493                self.random_buffer.push(v);
494            } else {
495                self.add_to_heap(v);
496            }
497
498            #[cfg(not(feature = "sampling"))]
499            self.add_to_heap(v);
500        }
501    }
502
503    fn new(start: BeamWrapper<T, B>, config: &ParsingConfig, cat: NodeIndex) -> Self {
504        let mut parse_heap = MinMaxHeap::with_capacity(config.max_beams.unwrap_or(50));
505        let key = BeamKey(start.log_prob(), 0);
506        parse_heap.push(key);
507        let beam_arena = vec![Some(start)];
508        ParseHeap {
509            parse_heap,
510            beam_arena,
511            phantom: PhantomData,
512            config: *config,
513            #[cfg(feature = "sampling")]
514            random_order: false,
515            #[cfg(feature = "sampling")]
516            random_buffer: vec![],
517            #[cfg(feature = "sampling")]
518            head: None,
519            rule_arena: PartialRulePool::default_pool(cat),
520        }
521    }
522
523    fn rules_mut(&mut self) -> &mut Vec<RuleHolder> {
524        &mut self.rule_arena
525    }
526}
527
528type ParserOutput<'a, T> = (LogProb<f64>, &'a [PhonContent<T>], RulePool);
529type GeneratorOutput<T> = (LogProb<f64>, Vec<PhonContent<T>>, RulePool);
530
531///An iterator constructed by [`Lexicon::fuzzy_parse`]
532pub struct FuzzyParser<
533    'a,
534    'b,
535    T: Eq + std::fmt::Debug + Clone,
536    Category: Eq + Clone + std::fmt::Debug,
537> {
538    lexicon: &'a Lexicon<T, Category>,
539    parse_heap: ParseHeap<T, FuzzyScan<'b, T>>,
540    config: &'a ParsingConfig,
541}
542
543impl<T, Category> Iterator for FuzzyParser<'_, '_, T, Category>
544where
545    T: Eq + std::fmt::Debug + Clone,
546    Category: Eq + Clone + std::fmt::Debug + Hash,
547{
548    type Item = GeneratorOutput<T>;
549
550    fn next(&mut self) -> Option<Self::Item> {
551        while let Some(mut beam) = self.parse_heap.pop() {
552            if let Some(moment) = beam.pop_moment() {
553                expand(
554                    &mut self.parse_heap,
555                    &moment,
556                    beam,
557                    self.lexicon,
558                    self.config,
559                );
560            } else if let Some(x) = FuzzyScan::yield_good_parse(beam, &self.parse_heap.rule_arena) {
561                return Some(x);
562            }
563        }
564
565        None
566    }
567}
568
569///An iterator constructed by [`Lexicon::parse`] and [`Lexicon::parse_multiple`]
570pub struct Parser<'a, 'b, T: Eq + std::fmt::Debug + Clone, Category: Eq + Clone + std::fmt::Debug> {
571    lexicon: &'a Lexicon<T, Category>,
572    parse_heap: ParseHeap<T, ParseScan<'b, T>>,
573
574    #[cfg(not(target_arch = "wasm32"))]
575    start_time: Option<Instant>,
576    config: &'a ParsingConfig,
577    buffer: Vec<ParserOutput<'b, T>>,
578}
579
580impl<'b, T, Category> Iterator for Parser<'_, 'b, T, Category>
581where
582    T: Eq + std::fmt::Debug + Clone,
583    Category: Eq + Clone + std::fmt::Debug,
584{
585    type Item = ParserOutput<'b, T>;
586
587    fn next(&mut self) -> Option<Self::Item> {
588        #[cfg(not(target_arch = "wasm32"))]
589        if self.start_time.is_none() {
590            self.start_time = Some(Instant::now());
591        }
592
593        if self.buffer.is_empty() {
594            while let Some(mut beam) = self.parse_heap.pop() {
595                #[cfg(not(target_arch = "wasm32"))]
596                if let Some(max_time) = self.config.max_time
597                    && max_time < self.start_time.unwrap().elapsed()
598                {
599                    return None;
600                }
601
602                if let Some(moment) = beam.pop_moment() {
603                    expand(
604                        &mut self.parse_heap,
605                        &moment,
606                        beam,
607                        self.lexicon,
608                        self.config,
609                    );
610                } else if let Some((mut good_parses, p, rules)) =
611                    ParseScan::yield_good_parse(beam, &self.parse_heap.rule_arena)
612                    && let Some(next_sentence) = good_parses.next()
613                {
614                    self.buffer
615                        .extend(good_parses.map(|x| (p, x, rules.clone())));
616                    let next = Some((p, next_sentence, rules));
617                    return next;
618                }
619            }
620        } else {
621            return self.buffer.pop();
622        }
623
624        None
625    }
626}
627
628///An iterator constructed by [`Lexicon::parse`] and [`Lexicon::parse_multiple`]
629#[cfg(feature = "sampling")]
630pub struct RandomParser<
631    'a,
632    'b,
633    T: Eq + std::fmt::Debug + Clone,
634    Category: Eq + Clone + std::fmt::Debug,
635    R: Rng,
636> {
637    lexicon: &'a Lexicon<T, Category>,
638    parse_heap: ParseHeap<T, ParseScan<'b, T>>,
639
640    #[cfg(not(target_arch = "wasm32"))]
641    start_time: Option<Instant>,
642    config: &'a ParsingConfig,
643    buffer: Vec<ParserOutput<'b, T>>,
644    rng: &'a mut R,
645}
646
647#[cfg(feature = "sampling")]
648impl<'b, T, Category, R> Iterator for RandomParser<'_, 'b, T, Category, R>
649where
650    T: Eq + std::fmt::Debug + Clone,
651    Category: Eq + Clone + std::fmt::Debug,
652    R: Rng,
653{
654    type Item = ParserOutput<'b, T>;
655
656    fn next(&mut self) -> Option<Self::Item> {
657        #[cfg(not(target_arch = "wasm32"))]
658        if self.start_time.is_none() {
659            self.start_time = Some(Instant::now());
660        }
661
662        if self.buffer.is_empty() {
663            while let Some(mut beam) = self.parse_heap.pop() {
664                #[cfg(not(target_arch = "wasm32"))]
665                if let Some(max_time) = self.config.max_time
666                    && max_time < self.start_time.unwrap().elapsed()
667                {
668                    return None;
669                }
670
671                if let Some(moment) = beam.pop_moment() {
672                    expand(
673                        &mut self.parse_heap,
674                        &moment,
675                        beam,
676                        self.lexicon,
677                        self.config,
678                    );
679                    self.parse_heap.process_randoms(self.rng);
680                } else if let Some((mut good_parses, p, rules)) =
681                    ParseScan::yield_good_parse(beam, &self.parse_heap.rule_arena)
682                    && let Some(next_sentence) = good_parses.next()
683                {
684                    //Get rid of parses that have already been yielded
685                    self.parse_heap.retain_map(|mut x| {
686                        x.beam.sentence.retain(|(s, _)| s != &next_sentence);
687                        if x.beam.sentence.is_empty() {
688                            None
689                        } else {
690                            Some(x)
691                        }
692                    });
693                    self.buffer
694                        .extend(good_parses.map(|x| (p, x, rules.clone())));
695                    let next = Some((p, next_sentence, rules));
696                    return next;
697                }
698            }
699        } else {
700            return self.buffer.pop();
701        }
702
703        None
704    }
705}
706
707impl<T, Category> Lexicon<T, Category>
708where
709    T: Eq + std::fmt::Debug + Clone,
710    Category: Eq + Clone + std::fmt::Debug,
711{
712    ///Generates the strings in a grammar that are findable according to the parsing config.
713    ///
714    ///
715    ///Returns an iterator, [`Parser`] which has items of type `(`[`LogProb<f64>`]`, Vec<T>,
716    ///`[`RulePool`]`)` where the middle items are the generated strings and the [`RulePool`] has
717    ///the structure of each genrerated string.
718    pub fn generate(
719        &self,
720        category: Category,
721        config: &ParsingConfig,
722    ) -> Result<Generator<&Self, T, Category>, ParsingError<Category>> {
723        let cat = self.find_category(&category)?;
724        let beam = BeamWrapper::new(GeneratorScan { sentence: vec![] }, cat);
725        let parse_heap = ParseHeap::new(beam, config, cat);
726        Ok(Generator {
727            lexicon: self,
728            config: *config,
729            parse_heap,
730            phantom: PhantomData,
731        })
732    }
733
734    ///Like [`Lexicon::generate`] but consumes the lexicon.
735    pub fn into_generate(
736        self,
737        category: Category,
738        config: &ParsingConfig,
739    ) -> Result<Generator<Self, T, Category>, ParsingError<Category>> {
740        let cat = self.find_category(&category)?;
741        let beam = BeamWrapper::new(GeneratorScan { sentence: vec![] }, cat);
742        let parse_heap = ParseHeap::new(beam, config, cat);
743        Ok(Generator {
744            lexicon: self,
745            config: *config,
746            parse_heap,
747            phantom: PhantomData,
748        })
749    }
750    ///Parses a sentence under the assumption it is has the category, `category`, but will randomly
751    ///return only *one* possible parse.
752    ///
753    ///Returns [`None`] if no parse can be found.
754    #[cfg(feature = "sampling")]
755    pub fn random_parse<'a, 'b, R: Rng>(
756        &'a self,
757        s: &'b [PhonContent<T>],
758        category: Category,
759        config: &'a ParsingConfig,
760        rng: &'a mut R,
761    ) -> Result<Option<ParserOutput<'b, T>>, ParsingError<Category>> {
762        let cat = self.find_category(&category)?;
763
764        let beam = BeamWrapper::new(
765            ParseScan {
766                sentence: vec![(s, 0)],
767            },
768            cat,
769        );
770        let mut parse_heap = ParseHeap::new(beam, config, cat);
771        parse_heap.random_order = true;
772        Ok(RandomParser {
773            lexicon: self,
774            config,
775            #[cfg(not(target_arch = "wasm32"))]
776            start_time: None,
777            buffer: vec![],
778            parse_heap,
779            rng,
780        }
781        .next())
782    }
783
784    ///Parses sentences under the assumption it is has the category, `category`, but will randomly
785    ///return only *one* possible parse for each sentence.
786    ///
787    ///Returns an iterator, [`Parser`] which has items of type `(`[`LogProb<f64>`]`, &'a [T],
788    ///`[`RulePool`]`)`
789    #[cfg(feature = "sampling")]
790    pub fn random_parse_multiple<'a, 'b, U, R: Rng>(
791        &'a self,
792        sentences: &'b [U],
793        category: Category,
794        config: &'a ParsingConfig,
795        rng: &'a mut R,
796    ) -> Result<RandomParser<'a, 'b, T, Category, R>, ParsingError<Category>>
797    where
798        U: AsRef<[PhonContent<T>]>,
799    {
800        let cat = self.find_category(&category)?;
801
802        let beam = BeamWrapper::new(
803            ParseScan {
804                sentence: sentences.iter().map(|x| (x.as_ref(), 0)).collect(),
805            },
806            cat,
807        );
808        let mut parse_heap = ParseHeap::new(beam, config, cat);
809        parse_heap.random_order = true;
810        Ok(RandomParser {
811            lexicon: self,
812            config,
813            #[cfg(not(target_arch = "wasm32"))]
814            start_time: None,
815            buffer: vec![],
816            parse_heap,
817            rng,
818        })
819    }
820
821    ///Parses a sentence under the assumption it is has the category, `category`.
822    ///
823    ///Returns an iterator, [`Parser`] which has items of type `(`[`LogProb<f64>`]`, &'a [T],
824    ///`[`RulePool`]`)`
825    pub fn parse<'a, 'b>(
826        &'a self,
827        s: &'b [PhonContent<T>],
828        category: Category,
829        config: &'a ParsingConfig,
830    ) -> Result<Parser<'a, 'b, T, Category>, ParsingError<Category>> {
831        let cat = self.find_category(&category)?;
832
833        let beam = BeamWrapper::new(
834            ParseScan {
835                sentence: vec![(s, 0)],
836            },
837            cat,
838        );
839        let parse_heap = ParseHeap::new(beam, config, cat);
840        Ok(Parser {
841            lexicon: self,
842            config,
843            #[cfg(not(target_arch = "wasm32"))]
844            start_time: None,
845            buffer: vec![],
846            parse_heap,
847        })
848    }
849
850    ///Functions like [`Lexicon::parse`] but parsing multiple grammars at once.
851    pub fn parse_multiple<'a, 'b, U>(
852        &'a self,
853        sentences: &'b [U],
854        category: Category,
855        config: &'a ParsingConfig,
856    ) -> Result<Parser<'a, 'b, T, Category>, ParsingError<Category>>
857    where
858        U: AsRef<[PhonContent<T>]>,
859    {
860        let cat = self.find_category(&category)?;
861        let beams = BeamWrapper::new(
862            ParseScan {
863                sentence: sentences.iter().map(|x| (x.as_ref(), 0)).collect(),
864            },
865            cat,
866        );
867        let parse_heap = ParseHeap::new(beams, config, cat);
868        Ok(Parser {
869            lexicon: self,
870            buffer: vec![],
871            #[cfg(not(target_arch = "wasm32"))]
872            start_time: None,
873            config,
874            parse_heap,
875        })
876    }
877
878    ///Functions like [`Lexicon::parse`] or [`Lexicon::generate`] but will return parses that are
879    ///close to the provided sentences, but are not necessarily the same.
880    pub fn fuzzy_parse<'a, 'b, U>(
881        &'a self,
882        sentences: &'b [U],
883        category: Category,
884        config: &'a ParsingConfig,
885    ) -> Result<FuzzyParser<'a, 'b, T, Category>, ParsingError<Category>>
886    where
887        U: AsRef<[PhonContent<T>]>,
888    {
889        let cat = self.find_category(&category)?;
890
891        let beams = BeamWrapper::new(
892            FuzzyScan {
893                sentence_guides: sentences.iter().map(|x| (x.as_ref(), 0)).collect(),
894                generated_sentences: vec![],
895            },
896            cat,
897        );
898
899        let parse_heap = ParseHeap::new(beams, config, cat);
900
901        Ok(FuzzyParser {
902            lexicon: self,
903            config,
904            parse_heap,
905        })
906    }
907}
908
909#[derive(Debug, Clone)]
910///An iterator constructed by [`Lexicon::parse`] and [`Lexicon::parse_multiple`]
911pub struct Generator<L, T: Eq + std::fmt::Debug + Clone, Category: Eq + Clone + std::fmt::Debug> {
912    lexicon: L,
913    phantom: PhantomData<Category>,
914    parse_heap: ParseHeap<T, GeneratorScan<T>>,
915    config: ParsingConfig,
916}
917
918impl<L, T, Category> Iterator for Generator<L, T, Category>
919where
920    L: Borrow<Lexicon<T, Category>>,
921    T: Eq + std::fmt::Debug + Clone,
922    Category: Eq + Clone + std::fmt::Debug + Hash,
923{
924    type Item = GeneratorOutput<T>;
925
926    fn next(&mut self) -> Option<Self::Item> {
927        while let Some(mut beam) = self.parse_heap.pop() {
928            if let Some(moment) = beam.pop_moment() {
929                expand(
930                    &mut self.parse_heap,
931                    &moment,
932                    beam,
933                    self.lexicon.borrow(),
934                    &self.config,
935                );
936            } else if let Some(x) =
937                GeneratorScan::yield_good_parse(beam, &self.parse_heap.rule_arena)
938            {
939                return Some(x);
940            }
941        }
942        None
943    }
944}
945
946pub mod grammars;
947pub mod lexicon;
948pub mod parsing;
949
950#[cfg(test)]
951mod tests;