1#![warn(missing_docs)]
34
35use std::borrow::Borrow;
36use std::fmt::{Debug, Display};
37use std::hash::Hash;
38use std::marker::PhantomData;
39
40#[cfg(not(target_arch = "wasm32"))]
41use std::time::{Duration, Instant};
42
43pub use lexicon::{Lexicon, ParsingError};
44
45use logprob::LogProb;
46use min_max_heap::MinMaxHeap;
47use parsing::RuleHolder;
48
49pub use parsing::RulePool;
50use parsing::beam::{FuzzyScan, GeneratorScan, ParseScan, Scanner};
51use parsing::{BeamWrapper, PartialRulePool, expand};
52use petgraph::graph::NodeIndex;
53
54use serde::{Deserialize, Serialize};
55use thiserror::Error;
56
57#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
59pub enum PhonContent<T> {
60 Normal(T),
62 Affixed(Vec<T>),
64}
65
66#[derive(Debug, Copy, Clone, PartialEq, Eq, Error)]
67pub struct FlattenError {}
69impl Display for FlattenError {
70 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71 write!(f, "This Input is not a Normal variant")
72 }
73}
74
75impl<T> PhonContent<T> {
76 pub fn try_inner(self) -> Result<T, FlattenError> {
78 match self {
79 PhonContent::Normal(x) => Ok(x),
80 PhonContent::Affixed(_) => Err(FlattenError {}),
81 }
82 }
83
84 pub fn new(x: Vec<T>) -> Vec<PhonContent<T>> {
86 x.into_iter().map(PhonContent::Normal).collect()
87 }
88
89 pub fn from<const N: usize>(x: [T; N]) -> [PhonContent<T>; N] {
91 x.map(PhonContent::Normal)
92 }
93
94 pub fn try_flatten(x: Vec<PhonContent<T>>) -> Result<Vec<T>, FlattenError> {
97 x.into_iter().map(|x| x.try_inner()).collect()
98 }
99}
100impl PhonContent<&str> {
101 pub fn flatten(x: Vec<PhonContent<&str>>) -> Vec<String> {
103 let mut v = vec![];
104 for content in x.into_iter() {
105 match content {
106 PhonContent::Normal(val) => v.push(val.to_string()),
107 PhonContent::Affixed(items) => v.push(items.join("")),
108 }
109 }
110 v
111 }
112}
113
114#[allow(missing_docs)]
117#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
118pub enum Direction {
119 #[default]
120 Left,
121 Right,
122}
123
124impl Direction {
125 pub fn flip(&self) -> Self {
127 match self {
128 Direction::Left => Direction::Right,
129 Direction::Right => Direction::Left,
130 }
131 }
132}
133
134impl From<Direction> for bool {
135 fn from(value: Direction) -> Self {
136 match value {
137 Direction::Left => false,
138 Direction::Right => true,
139 }
140 }
141}
142
143impl From<bool> for Direction {
144 fn from(value: bool) -> Self {
145 match value {
146 false => Direction::Left,
147 true => Direction::Right,
148 }
149 }
150}
151
152#[derive(Debug, Copy, Clone, PartialEq, Eq)]
161pub struct ParsingConfig {
162 min_log_prob: Option<LogProb<f64>>,
163 move_prob: LogProb<f64>,
164 dont_move_prob: LogProb<f64>,
165 max_steps: Option<usize>,
166 max_beams: Option<usize>,
167 max_consecutive_empty: Option<usize>,
168
169 #[cfg(not(target_arch = "wasm32"))]
170 max_time: Option<Duration>,
171}
172
173impl ParsingConfig {
174 pub fn empty() -> ParsingConfig {
178 let move_prob = LogProb::from_raw_prob(0.5).unwrap();
179 let dont_move_prob = move_prob.opposite_prob();
180
181 ParsingConfig {
182 min_log_prob: None,
183 move_prob,
184 dont_move_prob,
185 max_consecutive_empty: None,
186 max_steps: None,
187 max_beams: None,
188
189 #[cfg(not(target_arch = "wasm32"))]
190 max_time: None,
191 }
192 }
193
194 pub fn new(
196 min_log_prob: LogProb<f64>,
197 move_prob: LogProb<f64>,
198 max_steps: usize,
199 max_beams: usize,
200 ) -> ParsingConfig {
201 let max_steps = usize::min(parsing::MAX_STEPS, max_steps);
202 let merge_prob = move_prob.opposite_prob();
203 ParsingConfig {
204 min_log_prob: Some(min_log_prob),
205 move_prob,
206 dont_move_prob: merge_prob,
207 max_consecutive_empty: None,
208 max_steps: Some(max_steps),
209 max_beams: Some(max_beams),
210 #[cfg(not(target_arch = "wasm32"))]
211 max_time: None,
212 }
213 }
214
215 #[cfg(not(target_arch = "wasm32"))]
217 pub fn with_max_time(mut self, duration: Duration) -> Self {
218 self.max_time = Some(duration);
219 self
220 }
221
222 pub fn with_max_consecutive_empty(mut self, n: usize) -> Self {
224 self.max_consecutive_empty = Some(n);
225 self
226 }
227
228 pub fn with_min_log_prob(mut self, min_log_prob: LogProb<f64>) -> Self {
230 self.min_log_prob = Some(min_log_prob);
231 self
232 }
233
234 pub fn with_max_steps(mut self, max_steps: usize) -> Self {
236 self.max_steps = Some(max_steps);
237 self
238 }
239
240 pub fn with_max_beams(mut self, max_beams: usize) -> Self {
242 self.max_beams = Some(max_beams);
243 self
244 }
245
246 pub fn with_move_prob(mut self, move_prob: LogProb<f64>) -> Self {
248 self.move_prob = move_prob;
249 self.dont_move_prob = self.move_prob.opposite_prob();
250 self
251 }
252}
253
254impl Default for ParsingConfig {
255 fn default() -> Self {
256 ParsingConfig::new(
257 LogProb::new(-256.0).unwrap(),
258 LogProb::from_raw_prob(0.5).unwrap(),
259 128,
260 256,
261 )
262 }
263}
264
265#[derive(Debug, Clone)]
266struct ParseHeap<T, B: Scanner<T>> {
267 parse_heap: MinMaxHeap<BeamWrapper<T, B>>,
268 phantom: PhantomData<T>,
269 config: ParsingConfig,
270 rule_arena: Vec<RuleHolder>,
271}
272
273impl<T: Eq + std::fmt::Debug + Clone, B: Scanner<T> + Eq + Clone> ParseHeap<T, B> {
274 fn pop(&mut self) -> Option<BeamWrapper<T, B>> {
275 self.parse_heap.pop_max()
276 }
277
278 fn can_push(&self, v: &BeamWrapper<T, B>) -> bool {
279 let is_probable_enough = self
280 .config
281 .min_log_prob
282 .map(|p| v.log_prob() > p)
283 .unwrap_or(true);
284 let is_short_enough = self
285 .config
286 .max_steps
287 .map(|max_steps| v.n_steps() < max_steps)
288 .unwrap_or(true);
289 let is_not_fake_structure = self
290 .config
291 .max_consecutive_empty
292 .map(|n_empty| v.n_consecutive_empty() <= n_empty)
293 .unwrap_or(true);
294 is_short_enough && is_probable_enough && is_not_fake_structure
295 }
296
297 fn push(&mut self, v: BeamWrapper<T, B>) {
298 if self.can_push(&v) {
299 if let Some(max_beams) = self.config.max_beams
300 && self.parse_heap.len() > max_beams
301 {
302 self.parse_heap.push_pop_min(v);
303 } else {
304 self.parse_heap.push(v);
305 }
306 }
307 }
308
309 fn new(start: BeamWrapper<T, B>, config: &ParsingConfig, cat: NodeIndex) -> Self {
310 let mut parse_heap = MinMaxHeap::with_capacity(config.max_beams.unwrap_or(50));
311 parse_heap.push(start);
312 ParseHeap {
313 parse_heap,
314 phantom: PhantomData,
315 config: *config,
316 rule_arena: PartialRulePool::default_pool(cat),
317 }
318 }
319
320 fn rules_mut(&mut self) -> &mut Vec<RuleHolder> {
321 &mut self.rule_arena
322 }
323}
324
325type ParserOutput<'a, T> = (LogProb<f64>, &'a [PhonContent<T>], RulePool);
326type GeneratorOutput<T> = (LogProb<f64>, Vec<PhonContent<T>>, RulePool);
327
328pub struct FuzzyParser<
330 'a,
331 'b,
332 T: Eq + std::fmt::Debug + Clone,
333 Category: Eq + Clone + std::fmt::Debug,
334> {
335 lexicon: &'a Lexicon<T, Category>,
336 parse_heap: ParseHeap<T, FuzzyScan<'b, T>>,
337 config: &'a ParsingConfig,
338}
339
340impl<T, Category> Iterator for FuzzyParser<'_, '_, T, Category>
341where
342 T: Eq + std::fmt::Debug + Clone,
343 Category: Eq + Clone + std::fmt::Debug + Hash,
344{
345 type Item = GeneratorOutput<T>;
346
347 fn next(&mut self) -> Option<Self::Item> {
348 while let Some(mut beam) = self.parse_heap.pop() {
349 if let Some(moment) = beam.pop_moment() {
350 expand(
351 &mut self.parse_heap,
352 moment,
353 beam,
354 self.lexicon,
355 self.config,
356 );
357 } else if let Some(x) = FuzzyScan::yield_good_parse(beam, &self.parse_heap.rule_arena) {
358 return Some(x);
359 }
360 }
361
362 None
363 }
364}
365
366pub struct Parser<'a, 'b, T: Eq + std::fmt::Debug + Clone, Category: Eq + Clone + std::fmt::Debug> {
368 lexicon: &'a Lexicon<T, Category>,
369 parse_heap: ParseHeap<T, ParseScan<'b, T>>,
370
371 #[cfg(not(target_arch = "wasm32"))]
372 start_time: Option<Instant>,
373 config: &'a ParsingConfig,
374 buffer: Vec<ParserOutput<'b, T>>,
375}
376
377impl<'a, 'b, T, Category> Iterator for Parser<'a, 'b, T, Category>
378where
379 T: Eq + std::fmt::Debug + Clone,
380 Category: Eq + Clone + std::fmt::Debug,
381{
382 type Item = ParserOutput<'b, T>;
383
384 fn next(&mut self) -> Option<Self::Item> {
385 #[cfg(not(target_arch = "wasm32"))]
386 if self.start_time.is_none() {
387 self.start_time = Some(Instant::now());
388 }
389
390 if self.buffer.is_empty() {
391 while let Some(mut beam) = self.parse_heap.pop() {
392 #[cfg(not(target_arch = "wasm32"))]
393 if let Some(max_time) = self.config.max_time
394 && max_time < self.start_time.unwrap().elapsed()
395 {
396 return None;
397 }
398
399 if let Some(moment) = beam.pop_moment() {
400 expand(
401 &mut self.parse_heap,
402 moment,
403 beam,
404 self.lexicon,
405 self.config,
406 );
407 } else if let Some((mut good_parses, p, rules)) =
408 ParseScan::yield_good_parse(beam, &self.parse_heap.rule_arena)
409 && let Some(next_sentence) = good_parses.next()
410 {
411 self.buffer
412 .extend(good_parses.map(|x| (p, x, rules.clone())));
413 let next = Some((p, next_sentence, rules));
414 return next;
415 }
416 }
417 } else {
418 return self.buffer.pop();
419 }
420
421 None
422 }
423}
424
425impl<T, Category> Lexicon<T, Category>
426where
427 T: Eq + std::fmt::Debug + Clone,
428 Category: Eq + Clone + std::fmt::Debug,
429{
430 pub fn generate(
437 &self,
438 category: Category,
439 config: &ParsingConfig,
440 ) -> Result<Generator<&Self, T, Category>, ParsingError<Category>> {
441 let cat = self.find_category(&category)?;
442 let beam = BeamWrapper::new(GeneratorScan { sentence: vec![] }, cat);
443 let parse_heap = ParseHeap::new(beam, config, cat);
444 Ok(Generator {
445 lexicon: self,
446 config: *config,
447 parse_heap,
448 phantom: PhantomData,
449 })
450 }
451
452 pub fn into_generate(
454 self,
455 category: Category,
456 config: &ParsingConfig,
457 ) -> Result<Generator<Self, T, Category>, ParsingError<Category>> {
458 let cat = self.find_category(&category)?;
459 let beam = BeamWrapper::new(GeneratorScan { sentence: vec![] }, cat);
460 let parse_heap = ParseHeap::new(beam, config, cat);
461 Ok(Generator {
462 lexicon: self,
463 config: *config,
464 parse_heap,
465 phantom: PhantomData,
466 })
467 }
468
469 pub fn parse<'a, 'b>(
474 &'a self,
475 s: &'b [PhonContent<T>],
476 category: Category,
477 config: &'a ParsingConfig,
478 ) -> Result<Parser<'a, 'b, T, Category>, ParsingError<Category>> {
479 let cat = self.find_category(&category)?;
480
481 let beam = BeamWrapper::new(
482 ParseScan {
483 sentence: vec![(s, 0)],
484 },
485 cat,
486 );
487 let parse_heap = ParseHeap::new(beam, config, cat);
488 Ok(Parser {
489 lexicon: self,
490 config,
491 #[cfg(not(target_arch = "wasm32"))]
492 start_time: None,
493 buffer: vec![],
494 parse_heap,
495 })
496 }
497
498 pub fn parse_multiple<'a, 'b, U>(
500 &'a self,
501 sentences: &'b [U],
502 category: Category,
503 config: &'a ParsingConfig,
504 ) -> Result<Parser<'a, 'b, T, Category>, ParsingError<Category>>
505 where
506 U: AsRef<[PhonContent<T>]>,
507 {
508 let cat = self.find_category(&category)?;
509 let beams = BeamWrapper::new(
510 ParseScan {
511 sentence: sentences.iter().map(|x| (x.as_ref(), 0)).collect(),
512 },
513 cat,
514 );
515 let parse_heap = ParseHeap::new(beams, config, cat);
516 Ok(Parser {
517 lexicon: self,
518 buffer: vec![],
519 #[cfg(not(target_arch = "wasm32"))]
520 start_time: None,
521 config,
522 parse_heap,
523 })
524 }
525
526 pub fn fuzzy_parse<'a, 'b, U>(
529 &'a self,
530 sentences: &'b [U],
531 category: Category,
532 config: &'a ParsingConfig,
533 ) -> Result<FuzzyParser<'a, 'b, T, Category>, ParsingError<Category>>
534 where
535 U: AsRef<[PhonContent<T>]>,
536 {
537 let cat = self.find_category(&category)?;
538
539 let beams = BeamWrapper::new(
540 FuzzyScan {
541 sentence_guides: sentences.iter().map(|x| (x.as_ref(), 0)).collect(),
542 generated_sentences: vec![],
543 },
544 cat,
545 );
546
547 let parse_heap = ParseHeap::new(beams, config, cat);
548
549 Ok(FuzzyParser {
550 lexicon: self,
551 config,
552 parse_heap,
553 })
554 }
555}
556
557#[derive(Debug, Clone)]
558pub struct Generator<L, T: Eq + std::fmt::Debug + Clone, Category: Eq + Clone + std::fmt::Debug> {
560 lexicon: L,
561 phantom: PhantomData<Category>,
562 parse_heap: ParseHeap<T, GeneratorScan<T>>,
563 config: ParsingConfig,
564}
565
566impl<L, T, Category> Iterator for Generator<L, T, Category>
567where
568 L: Borrow<Lexicon<T, Category>>,
569 T: Eq + std::fmt::Debug + Clone,
570 Category: Eq + Clone + std::fmt::Debug + Hash,
571{
572 type Item = GeneratorOutput<T>;
573
574 fn next(&mut self) -> Option<Self::Item> {
575 while let Some(mut beam) = self.parse_heap.pop() {
576 if let Some(moment) = beam.pop_moment() {
577 expand(
578 &mut self.parse_heap,
579 moment,
580 beam,
581 self.lexicon.borrow(),
582 &self.config,
583 );
584 } else if let Some(x) =
585 GeneratorScan::yield_good_parse(beam, &self.parse_heap.rule_arena)
586 {
587 return Some(x);
588 }
589 }
590 None
591 }
592}
593
594pub mod grammars;
595pub mod lexicon;
596pub mod parsing;
597
598#[cfg(test)]
599mod tests;