1#![warn(missing_docs)]
5use ahash::HashSet;
6use chumsky::prelude::*;
7use lambda::RootedLambdaPool;
8use language::Expr;
9use serde::{Deserialize, Serialize};
10use std::{collections::BTreeMap, fmt::Display};
11use thiserror::Error;
12
13pub type Actor<'a> = &'a str;
15pub type Event = u8;
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
20pub enum Entity<'a> {
22 #[serde(borrow)]
24 Actor(Actor<'a>),
25 Event(Event),
27}
28
29impl Display for Entity<'_> {
30 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
31 match self {
32 Entity::Actor(a) => write!(f, "a_{a}"),
33 Entity::Event(a) => write!(f, "e_{a}"),
34 }
35 }
36}
37
38#[derive(
40 Debug, Clone, Copy, PartialEq, Default, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize,
41)]
42pub struct ThetaRoles<'a> {
43 #[serde(borrow)]
45 pub agent: Option<Actor<'a>>,
46 pub patient: Option<Actor<'a>>,
48}
49
50type PropertyLabel<'a> = &'a str;
51
52#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
55pub struct Scenario<'a> {
56 #[serde(borrow)]
57 actors: Vec<Actor<'a>>,
58 thematic_relations: Vec<ThetaRoles<'a>>,
59 properties: BTreeMap<PropertyLabel<'a>, Vec<Entity<'a>>>,
60 question: Vec<RootedLambdaPool<'a, Expr<'a>>>,
61}
62
63impl<'a> Scenario<'a> {
64 #[must_use]
66 pub fn new(
67 actors: Vec<Actor<'a>>,
68 thematic_relations: Vec<ThetaRoles<'a>>,
69 properties: BTreeMap<PropertyLabel<'a>, Vec<Entity<'a>>>,
70 ) -> Scenario<'a> {
71 Scenario {
72 actors,
73 thematic_relations,
74 properties,
75 question: vec![],
76 }
77 }
78
79 #[must_use]
81 pub fn thematic_relations(&self) -> &[ThetaRoles<'a>] {
82 &self.thematic_relations
83 }
84
85 #[must_use]
87 pub fn properties(&self) -> &BTreeMap<PropertyLabel<'a>, Vec<Entity<'a>>> {
88 &self.properties
89 }
90
91 #[must_use]
93 pub fn actors(&self) -> &[Actor<'a>] {
94 &self.actors
95 }
96
97 #[must_use]
100 pub fn questions(&self) -> &[RootedLambdaPool<'a, Expr<'a>>] {
101 &self.question
102 }
103
104 pub fn question_mut(&mut self) -> &mut Vec<RootedLambdaPool<'a, Expr<'a>>> {
107 &mut self.question
108 }
109
110 fn events(&self) -> impl Iterator<Item = Event> {
111 0..Event::try_from(self.thematic_relations.len()).unwrap()
112 }
113}
114
115#[derive(Debug, Default, Clone, Eq, PartialEq, Serialize, Deserialize)]
118pub struct ScenarioDataset<'a> {
119 #[serde(borrow)]
120 scenarios: Vec<Scenario<'a>>,
121 sentences: Vec<Vec<&'a str>>,
122 lemmas: Vec<&'a str>,
123}
124
125#[derive(Debug, Default, Clone, Eq, PartialEq, Error)]
127pub struct DatasetError {}
128
129impl Display for DatasetError {
130 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
131 write!(f, "Scenario and sentences not of equal length!")
132 }
133}
134
135impl<'a> ScenarioDataset<'a> {
136 pub fn new(
141 scenarios: Vec<Scenario<'a>>,
142 sentences: Vec<Vec<&'a str>>,
143 ) -> Result<Self, DatasetError> {
144 if scenarios.len() != sentences.len() {
145 return Err(DatasetError {});
146 }
147 let lemmas = sentences.iter().flatten().copied().collect::<HashSet<_>>();
148 let mut lemmas: Vec<_> = lemmas.into_iter().collect();
149 lemmas.sort_unstable();
150 Ok(ScenarioDataset {
151 scenarios,
152 sentences,
153 lemmas,
154 })
155 }
156
157 #[must_use]
159 pub fn is_empty(&self) -> bool {
160 self.scenarios.is_empty()
161 }
162
163 #[must_use]
165 pub fn len(&self) -> usize {
166 self.scenarios.len()
167 }
168
169 pub fn iter_scenarios_mut(&mut self) -> impl Iterator<Item = &mut Scenario<'a>> {
171 self.scenarios.iter_mut()
172 }
173
174 pub fn iter_scenarios(&self) -> impl Iterator<Item = &Scenario<'a>> {
176 self.scenarios.iter()
177 }
178
179 pub fn iter_mut(&mut self) -> impl Iterator<Item = (&mut Scenario<'a>, &mut Vec<&'a str>)> {
181 self.scenarios.iter_mut().zip(self.sentences.iter_mut())
182 }
183
184 pub fn iter(&self) -> impl Iterator<Item = (&Scenario<'a>, &Vec<&'a str>)> {
186 self.scenarios.iter().zip(self.sentences.iter())
187 }
188
189 #[must_use]
191 pub fn lemmas(&self) -> &[&'a str] {
192 &self.lemmas
193 }
194
195 pub fn parse(s: &'a str) -> Result<Self, ScenarioParsingError> {
203 let parser = scenario::scenario_dataset_parser();
204 let parse = parser.parse(s).into_result();
205 parse.map_err(ScenarioParsingError::from)
206 }
207
208 #[expect(clippy::missing_panics_doc)] pub fn parse_rows<I: Iterator<Item = &'a str>>(i: I) -> Result<Self, ScenarioParsingError> {
217 let parser = string_scenario_parser().then_ignore(end());
218 let mut scenarios = vec![];
219 let mut sentences = vec![];
220 for x in i.filter(|x| !x.is_empty()) {
221 let (a, b) = parser.parse(x).into_result()?;
222 sentences.push(a);
223 scenarios.push(b);
224 }
225 if sentences.is_empty() {
226 return Err(ScenarioParsingError(vec![
227 "Dataset must have at least one sentence/scenario pair!".to_string(),
228 ]));
229 }
230
231 Ok(ScenarioDataset::new(scenarios, sentences).unwrap())
232 }
233}
234
235impl From<Vec<Rich<'_, char>>> for ScenarioParsingError {
236 fn from(value: Vec<Rich<'_, char>>) -> Self {
237 ScenarioParsingError(value.into_iter().map(|x| x.to_string()).collect())
238 }
239}
240
241#[derive(Debug, Clone, Eq, PartialEq, Error)]
243pub struct ScenarioParsingError(pub Vec<String>);
244impl Display for ScenarioParsingError {
245 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
246 write!(f, "{}", self.0.join(" "))
247 }
248}
249
250pub mod lambda;
251pub mod language;
252mod utils;
253pub use language::{LanguageExpression, LanguageResult, parse_executable};
254mod scenario;
255pub use scenario::{EventType, PossibleEvent, ScenarioIterator};
256
257use crate::scenario::string_scenario_parser;