nom_utils/
lib.rs

1use bstr::{BStr, ByteSlice};
2use nom::error::{ContextError, ErrorKind};
3use nom::Input;
4use nom_locate::LocatedSpan;
5use std::fmt::{Debug, Write};
6use std::marker::PhantomData;
7
8pub type Span<'a> = LocatedSpan<&'a [u8]>;
9pub type IResult<'a, A, B> = nom::IResult<A, B, ParseError<Span<'a>>>;
10
11pub fn make_span(s: &'_ [u8]) -> Span<'_> {
12    Span::new(s)
13}
14
15/// Like nom::bytes::complete::tag, except that we print what the tag
16/// was expecting if there was an error.
17/// I feel like this should be the default behavior TBH.
18pub fn tag<E>(tag: &'static str) -> TagParser<E> {
19    TagParser {
20        tag,
21        e: PhantomData,
22    }
23}
24
25/// Struct to support displaying better errors for tag()
26pub struct TagParser<E> {
27    tag: &'static str,
28    e: PhantomData<E>,
29}
30
31/// All this fuss to show what we expected for the TagParser impl
32impl<I, Error: nom::error::ParseError<I> + nom::error::FromExternalError<I, String>> nom::Parser<I>
33    for TagParser<Error>
34where
35    I: nom::Input + nom::Compare<&'static str> + nom::AsBytes,
36{
37    type Output = I;
38    type Error = Error;
39
40    fn process<OM: nom::OutputMode>(
41        &mut self,
42        i: I,
43    ) -> nom::PResult<OM, I, Self::Output, Self::Error> {
44        use nom::error::ErrorKind;
45        use nom::{CompareResult, Err, Mode};
46
47        let tag_len = self.tag.input_len();
48
49        match i.compare(self.tag) {
50            CompareResult::Ok => Ok((i.take_from(tag_len), OM::Output::bind(|| i.take(tag_len)))),
51            CompareResult::Incomplete => Err(Err::Error(OM::Error::bind(|| {
52                Error::from_external_error(
53                    i,
54                    ErrorKind::Fail,
55                    format!(
56                        "expected \"{}\" but ran out of input",
57                        self.tag.escape_debug()
58                    ),
59                )
60            }))),
61
62            CompareResult::Error => {
63                let available = i.take(i.input_len().min(tag_len));
64                Err(Err::Error(OM::Error::bind(|| {
65                    Error::from_external_error(
66                        i,
67                        ErrorKind::Fail,
68                        format!(
69                            "expected \"{}\" but found {:?}",
70                            self.tag.escape_debug(),
71                            BStr::new(available.as_bytes())
72                        ),
73                    )
74                })))
75            }
76        }
77    }
78}
79
80#[derive(Debug)]
81pub enum ParseErrorKind {
82    Context(&'static str),
83    Char(char),
84    Nom(ErrorKind),
85    External { kind: ErrorKind, reason: String },
86}
87
88#[derive(Debug)]
89pub struct ParseError<I: Debug> {
90    pub errors: Vec<(I, ParseErrorKind)>,
91}
92
93impl<I: Debug> ContextError<I> for ParseError<I> {
94    fn add_context(input: I, ctx: &'static str, mut other: Self) -> Self {
95        other.errors.push((input, ParseErrorKind::Context(ctx)));
96        other
97    }
98}
99
100impl<I: Debug> nom::error::ParseError<I> for ParseError<I> {
101    fn from_error_kind(input: I, kind: ErrorKind) -> Self {
102        Self {
103            errors: vec![(input, ParseErrorKind::Nom(kind))],
104        }
105    }
106
107    fn append(input: I, kind: ErrorKind, mut other: Self) -> Self {
108        other.errors.push((input, ParseErrorKind::Nom(kind)));
109        other
110    }
111
112    fn from_char(input: I, c: char) -> Self {
113        Self {
114            errors: vec![(input, ParseErrorKind::Char(c))],
115        }
116    }
117}
118
119impl<I: Debug, E: std::fmt::Display> nom::error::FromExternalError<I, E> for ParseError<I> {
120    fn from_external_error(input: I, kind: ErrorKind, err: E) -> Self {
121        Self {
122            errors: vec![(
123                input,
124                ParseErrorKind::External {
125                    kind,
126                    reason: format!("{err:#}"),
127                },
128            )],
129        }
130    }
131}
132
133pub fn make_context_error<S: Into<String>>(
134    input: Span<'_>,
135    reason: S,
136) -> nom::Err<ParseError<Span<'_>>> {
137    nom::Err::Error(ParseError {
138        errors: vec![(
139            input,
140            ParseErrorKind::External {
141                kind: nom::error::ErrorKind::Fail,
142                reason: reason.into(),
143            },
144        )],
145    })
146}
147
148pub fn explain_nom(input: Span, err: nom::Err<ParseError<Span<'_>>>) -> String {
149    match err {
150        nom::Err::Error(e) => {
151            let mut result = String::new();
152            let mut lines_shown = vec![];
153
154            for (span, kind) in e.errors.iter() {
155                if input.is_empty() {
156                    match kind {
157                        ParseErrorKind::Char(c) => {
158                            write!(&mut result, "Error expected '{c}', got empty input\n\n")
159                        }
160                        ParseErrorKind::Context(s) => {
161                            write!(&mut result, "Error in {s}, got empty input\n\n")
162                        }
163                        ParseErrorKind::External { kind, reason } => {
164                            write!(&mut result, "Error {reason} {kind:?}, got empty input\n\n")
165                        }
166                        ParseErrorKind::Nom(e) => {
167                            write!(&mut result, "Error in {e:?}, got empty input\n\n")
168                        }
169                    }
170                    .ok();
171                    continue;
172                }
173
174                let line_number = span.location_line();
175                let input_line = span.get_line_beginning();
176                // Remap \t in particular, because it can render as multiple
177                // columns and defeat the column number calculation provided
178                // by the Span type
179                let mut line = String::new();
180                for (start, end, c) in input_line.char_indices() {
181                    let c = match c {
182                        '\t' => '\u{2409}',
183                        '\r' => '\u{240d}',
184                        '\n' => '\u{240a}',
185                        c => c,
186                    };
187
188                    if c == std::char::REPLACEMENT_CHARACTER {
189                        let bytes = &input_line[start..end];
190                        for b in bytes.iter() {
191                            line.push_str(&format!("\\x{b:02X}"));
192                        }
193                    } else {
194                        line.push(c);
195                    }
196                }
197
198                let column = span.get_utf8_column();
199
200                lines_shown.push(line_number);
201
202                let mut caret = " ".repeat(column.saturating_sub(1));
203                caret.push('^');
204                for _ in 1..span.fragment().len() {
205                    caret.push('_')
206                }
207
208                match kind {
209                    ParseErrorKind::Char(expected) => {
210                        if let Some(actual) = span.fragment().chars().next() {
211                            write!(
212                                &mut result,
213                                "Error at line {line_number}:\n\
214                                    {line}\n\
215                                    {caret}\n\
216                                    expected '{expected}', found {actual}\n\n",
217                            )
218                        } else {
219                            write!(
220                                &mut result,
221                                "Error at line {line_number}:\n\
222                                    {line}\n\
223                                    {caret}\n\
224                                    expected '{expected}', got end of input\n\n",
225                            )
226                        }
227                    }
228                    ParseErrorKind::Context(context) => {
229                        write!(&mut result, "while parsing {context}\n")
230                    }
231                    ParseErrorKind::External { kind: _, reason } => {
232                        write!(
233                            &mut result,
234                            "Error at line {line_number}, {reason}:\n\
235                                {line}\n\
236                                {caret}\n\n",
237                        )
238                    }
239                    ParseErrorKind::Nom(nom_err) => {
240                        write!(
241                            &mut result,
242                            "Error at line {line_number}, in {nom_err:?}:\n\
243                                {line}\n\
244                                {caret}\n\n",
245                        )
246                    }
247                }
248                .ok();
249            }
250            result
251        }
252        _ => format!("{err:#}"),
253    }
254}