simple_semantics/
lib.rs

1//! # Simple semantics
2//! This crate defines a simple language of thought with a lambda calculus and a randomization
3//! procedure to learn simple montagovian grammars.
4#![warn(missing_docs)]
5use ahash::HashSet;
6use chumsky::prelude::*;
7use lambda::RootedLambdaPool;
8use language::{Expr, LambdaParseError};
9use std::{collections::BTreeMap, fmt::Display};
10use thiserror::Error;
11
12///The representation of an entity that can receive theta roles (e.g. a human, a cup, a thought).
13pub type Actor<'a> = &'a str;
14///The representation of an entity that can assign theta roles (e.g. a runnning even, when its raining, etc.)
15///They are representated as indices to the relevant [`ThetaRoles`] in a given [`Scenario`].
16pub type Event = u8;
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
19///The union of [`Actor`] and [`Event`]
20pub enum Entity<'a> {
21    ///See [`Actor`]
22    Actor(Actor<'a>),
23    ///See [`Event`]
24    Event(Event),
25}
26
27impl Display for Entity<'_> {
28    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29        match self {
30            Entity::Actor(a) => write!(f, "a_{a}"),
31            Entity::Event(a) => write!(f, "e_{a}"),
32        }
33    }
34}
35
36///The representation of the theta roles of a given event.
37#[derive(Debug, Clone, Copy, PartialEq, Default, Eq, Hash)]
38pub struct ThetaRoles<'a> {
39    ///The agent of the event.
40    pub agent: Option<Actor<'a>>,
41    ///The patient of the event.
42    pub patient: Option<Actor<'a>>,
43}
44
45type PropertyLabel<'a> = &'a str;
46
47///The representation of a scenario. A moment consisting of various events, the present actors and
48///any predicates that apply to either.
49#[derive(Debug, Clone, Eq, PartialEq, Hash)]
50pub struct Scenario<'a> {
51    actors: Vec<Actor<'a>>,
52    thematic_relations: Vec<ThetaRoles<'a>>,
53    properties: BTreeMap<PropertyLabel<'a>, Vec<Entity<'a>>>,
54    question: Vec<RootedLambdaPool<'a, Expr<'a>>>,
55}
56
57impl<'a> Scenario<'a> {
58    ///Create a new scenario.
59    pub fn new(
60        actors: Vec<Actor<'a>>,
61        thematic_relations: Vec<ThetaRoles<'a>>,
62        properties: BTreeMap<PropertyLabel<'a>, Vec<Entity<'a>>>,
63    ) -> Scenario<'a> {
64        Scenario {
65            actors,
66            thematic_relations,
67            properties,
68            question: vec![],
69        }
70    }
71
72    ///Get the representation of all events as a slice of [`ThetaRoles`].
73    pub fn thematic_relations(&self) -> &[ThetaRoles<'a>] {
74        &self.thematic_relations
75    }
76
77    ///Get the properties (e.g. what predicates apply) of the entities in a scenario.
78    pub fn properties(&self) -> &BTreeMap<PropertyLabel<'a>, Vec<Entity<'a>>> {
79        &self.properties
80    }
81
82    ///Get a slice of all [`Actor`]s in the [`Scenario`]
83    pub fn actors(&self) -> &[Actor<'_>] {
84        &self.actors
85    }
86
87    ///Get the questions associated with a scenario (which may be empty if there are no questions).
88    ///Questions are representated as [`RootedLambdaPool`] which return a truth value.
89    pub fn questions(&self) -> &[RootedLambdaPool<'a, Expr<'a>>] {
90        &self.question
91    }
92
93    ///Get a mutable reference to the questions of a scenario.
94    ///See [`Scenario::questions`]
95    pub fn question_mut(&mut self) -> &mut Vec<RootedLambdaPool<'a, Expr<'a>>> {
96        &mut self.question
97    }
98
99    fn events(&self) -> impl Iterator<Item = Event> {
100        0..(self.thematic_relations.len() as Event)
101    }
102}
103
104///A struct defining a dataset of different [`Scenario`]s as well as their associated sentences all
105///lemmas in the dataset.
106#[derive(Debug, Default, Clone, Eq, PartialEq)]
107pub struct ScenarioDataset<'a> {
108    scenarios: Vec<Scenario<'a>>,
109    sentences: Vec<Vec<&'a str>>,
110    lemmas: Vec<&'a str>,
111}
112
113///Error for creating a dataset without equal sentences and scenarios.
114#[derive(Debug, Default, Clone, Eq, PartialEq, Error)]
115pub struct DatasetError {}
116
117impl Display for DatasetError {
118    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
119        write!(f, "Scenario and sentences not of equal length!")
120    }
121}
122
123impl<'a> ScenarioDataset<'a> {
124    ///Create a new [`ScenarioDataset`]
125    pub fn new(
126        scenarios: Vec<Scenario<'a>>,
127        sentences: Vec<Vec<&'a str>>,
128        lemmas: HashSet<&'a str>,
129    ) -> Result<Self, DatasetError> {
130        if scenarios.len() != sentences.len() {
131            return Err(DatasetError {});
132        }
133
134        let mut lemmas: Vec<_> = lemmas.into_iter().collect();
135        lemmas.sort();
136        Ok(ScenarioDataset {
137            scenarios,
138            sentences,
139            lemmas,
140        })
141    }
142
143    ///Is the dataset empty?
144    pub fn is_empty(&self) -> bool {
145        self.scenarios.is_empty()
146    }
147
148    ///The number of scenarios in the [`ScenarioDataset`].
149    pub fn len(&self) -> usize {
150        self.scenarios.len()
151    }
152
153    ///Iterate over all scenarios with a mutable reference.
154    pub fn iter_scenarios_mut(&mut self) -> impl Iterator<Item = &mut Scenario<'a>> {
155        self.scenarios.iter_mut()
156    }
157
158    ///Iterate over all scenarios
159    pub fn iter_scenarios(&self) -> impl Iterator<Item = &Scenario<'a>> {
160        self.scenarios.iter()
161    }
162
163    ///Iterate over all scenarios and sentences with a mutable reference.
164    pub fn iter_mut(&mut self) -> impl Iterator<Item = (&mut Scenario<'a>, &mut Vec<&'a str>)> {
165        self.scenarios.iter_mut().zip(self.sentences.iter_mut())
166    }
167
168    ///Iterate over all scenarios and sentences
169    pub fn iter(&self) -> impl Iterator<Item = (&Scenario<'a>, &Vec<&'a str>)> {
170        self.scenarios.iter().zip(self.sentences.iter())
171    }
172
173    ///Get the available lemmas of a dataset.
174    pub fn lemmas(&self) -> &[&'a str] {
175        &self.lemmas
176    }
177
178    ///Parse a list of sentences and scenarios and return the dataset.
179    pub fn parse(s: &'a str) -> Result<Self, LambdaParseError> {
180        let parser = scenario::scenario_parser();
181        let parse = parser.parse(s).into_result();
182        parse?
183    }
184}
185
186pub mod lambda;
187pub mod language;
188mod utils;
189pub use language::{LanguageExpression, LanguageResult, parse_executable};
190mod scenario;