simple_semantics/
lib.rs

1//! # Simple semantics
2//! This crate defines a simple language of thought with a lambda calculus and a randomization
3//! procedure to learn simple montagovian grammars.
4#![warn(missing_docs)]
5use ahash::HashSet;
6use chumsky::prelude::*;
7use lambda::RootedLambdaPool;
8use language::Expr;
9use serde::{Deserialize, Serialize};
10use std::{collections::BTreeMap, fmt::Display};
11use thiserror::Error;
12
13///The representation of an entity that can receive theta roles (e.g. a human, a cup, a thought).
14pub type Actor<'a> = &'a str;
15///The representation of an entity that can assign theta roles (e.g. a runnning even, when its raining, etc.)
16///They are representated as indices to the relevant [`ThetaRoles`] in a given [`Scenario`].
17pub type Event = u8;
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
20///The union of [`Actor`] and [`Event`]
21pub enum Entity<'a> {
22    ///See [`Actor`]
23    #[serde(borrow)]
24    Actor(Actor<'a>),
25    ///See [`Event`]
26    Event(Event),
27}
28
29impl Display for Entity<'_> {
30    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
31        match self {
32            Entity::Actor(a) => write!(f, "a_{a}"),
33            Entity::Event(a) => write!(f, "e_{a}"),
34        }
35    }
36}
37
38///The representation of the theta roles of a given event.
39#[derive(
40    Debug, Clone, Copy, PartialEq, Default, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize,
41)]
42pub struct ThetaRoles<'a> {
43    ///The agent of the event.
44    #[serde(borrow)]
45    pub agent: Option<Actor<'a>>,
46    ///The patient of the event.
47    pub patient: Option<Actor<'a>>,
48}
49
50type PropertyLabel<'a> = &'a str;
51
52///The representation of a scenario. A moment consisting of various events, the present actors and
53///any predicates that apply to either.
54#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
55pub struct Scenario<'a> {
56    #[serde(borrow)]
57    actors: Vec<Actor<'a>>,
58    thematic_relations: Vec<ThetaRoles<'a>>,
59    properties: BTreeMap<PropertyLabel<'a>, Vec<Entity<'a>>>,
60    question: Vec<RootedLambdaPool<'a, Expr<'a>>>,
61}
62
63impl<'a> Scenario<'a> {
64    ///Create a new scenario.
65    #[must_use]
66    pub fn new(
67        actors: Vec<Actor<'a>>,
68        thematic_relations: Vec<ThetaRoles<'a>>,
69        properties: BTreeMap<PropertyLabel<'a>, Vec<Entity<'a>>>,
70    ) -> Scenario<'a> {
71        Scenario {
72            actors,
73            thematic_relations,
74            properties,
75            question: vec![],
76        }
77    }
78
79    ///Get the representation of all events as a slice of [`ThetaRoles`].
80    #[must_use]
81    pub fn thematic_relations(&self) -> &[ThetaRoles<'a>] {
82        &self.thematic_relations
83    }
84
85    ///Get the properties (e.g. what predicates apply) of the entities in a scenario.
86    #[must_use]
87    pub fn properties(&self) -> &BTreeMap<PropertyLabel<'a>, Vec<Entity<'a>>> {
88        &self.properties
89    }
90
91    ///Get a slice of all [`Actor`]s in the [`Scenario`]
92    #[must_use]
93    pub fn actors(&self) -> &[Actor<'a>] {
94        &self.actors
95    }
96
97    ///Get the questions associated with a scenario (which may be empty if there are no questions).
98    ///Questions are representated as [`RootedLambdaPool`] which return a truth value.
99    #[must_use]
100    pub fn questions(&self) -> &[RootedLambdaPool<'a, Expr<'a>>] {
101        &self.question
102    }
103
104    ///Get a mutable reference to the questions of a scenario.
105    ///See [`Scenario::questions`]
106    pub fn question_mut(&mut self) -> &mut Vec<RootedLambdaPool<'a, Expr<'a>>> {
107        &mut self.question
108    }
109
110    fn events(&self) -> impl Iterator<Item = Event> {
111        0..Event::try_from(self.thematic_relations.len()).unwrap()
112    }
113}
114
115///A struct defining a dataset of different [`Scenario`]s as well as their associated sentences all
116///lemmas in the dataset.
117#[derive(Debug, Default, Clone, Eq, PartialEq, Serialize, Deserialize)]
118pub struct ScenarioDataset<'a> {
119    #[serde(borrow)]
120    scenarios: Vec<Scenario<'a>>,
121    sentences: Vec<Vec<&'a str>>,
122    lemmas: Vec<&'a str>,
123}
124
125///Error for creating a dataset without equal sentences and scenarios.
126#[derive(Debug, Default, Clone, Eq, PartialEq, Error)]
127pub struct DatasetError {}
128
129impl Display for DatasetError {
130    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
131        write!(f, "Scenario and sentences not of equal length!")
132    }
133}
134
135impl<'a> ScenarioDataset<'a> {
136    ///Create a new [`ScenarioDataset`]
137    ///
138    ///# Errors
139    ///Returns a dataset error if `scenarios.len() != sentence.len()`
140    pub fn new(
141        scenarios: Vec<Scenario<'a>>,
142        sentences: Vec<Vec<&'a str>>,
143    ) -> Result<Self, DatasetError> {
144        if scenarios.len() != sentences.len() {
145            return Err(DatasetError {});
146        }
147        let lemmas = sentences.iter().flatten().copied().collect::<HashSet<_>>();
148        let mut lemmas: Vec<_> = lemmas.into_iter().collect();
149        lemmas.sort_unstable();
150        Ok(ScenarioDataset {
151            scenarios,
152            sentences,
153            lemmas,
154        })
155    }
156
157    ///Is the dataset empty?
158    #[must_use]
159    pub fn is_empty(&self) -> bool {
160        self.scenarios.is_empty()
161    }
162
163    ///The number of scenarios in the [`ScenarioDataset`].
164    #[must_use]
165    pub fn len(&self) -> usize {
166        self.scenarios.len()
167    }
168
169    ///Iterate over all scenarios with a mutable reference.
170    pub fn iter_scenarios_mut(&mut self) -> impl Iterator<Item = &mut Scenario<'a>> {
171        self.scenarios.iter_mut()
172    }
173
174    ///Iterate over all scenarios
175    pub fn iter_scenarios(&self) -> impl Iterator<Item = &Scenario<'a>> {
176        self.scenarios.iter()
177    }
178
179    ///Iterate over all scenarios and sentences with a mutable reference.
180    pub fn iter_mut(&mut self) -> impl Iterator<Item = (&mut Scenario<'a>, &mut Vec<&'a str>)> {
181        self.scenarios.iter_mut().zip(self.sentences.iter_mut())
182    }
183
184    ///Iterate over all scenarios and sentences
185    pub fn iter(&self) -> impl Iterator<Item = (&Scenario<'a>, &Vec<&'a str>)> {
186        self.scenarios.iter().zip(self.sentences.iter())
187    }
188
189    ///Get the available lemmas of a dataset.
190    #[must_use]
191    pub fn lemmas(&self) -> &[&'a str] {
192        &self.lemmas
193    }
194
195    ///Parse a list of sentences and scenarios and return the dataset.
196    ///
197    ///# Errors
198    ///Returns a [`LambdaParseError`] if the string is malformed and doesn't represent a
199    ///[`ScenarioDataset`]
200    pub fn parse(s: &'a str) -> Result<Self, ScenarioParsingError> {
201        let parser = scenario::scenario_dataset_parser();
202        let parse = parser.parse(s).into_result();
203        parse.map_err(ScenarioParsingError::from)
204    }
205}
206
207impl From<Vec<Rich<'_, char>>> for ScenarioParsingError {
208    fn from(value: Vec<Rich<'_, char>>) -> Self {
209        ScenarioParsingError(value.into_iter().map(|x| x.to_string()).collect())
210    }
211}
212
213///An error that arises from parsing a [`Scenario`] or [`ScenarioDataset`]
214#[derive(Debug, Clone, Eq, PartialEq, Error)]
215pub struct ScenarioParsingError(pub Vec<String>);
216impl Display for ScenarioParsingError {
217    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
218        write!(f, "{}", self.0.join(" "))
219    }
220}
221
222pub mod lambda;
223pub mod language;
224mod utils;
225pub use language::{LanguageExpression, LanguageResult, parse_executable};
226mod scenario;
227pub use scenario::{EventType, PossibleEvent, ScenarioIterator};