mailparsing/
mimepart.rs

1use crate::header::{HeaderParseResult, MessageConformance};
2use crate::headermap::HeaderMap;
3use crate::strings::IntoSharedString;
4use crate::{
5    has_lone_cr_or_lf, Header, MailParsingError, MessageID, MimeParameters, Result, SharedString,
6};
7use charset::Charset;
8use std::borrow::Cow;
9use std::str::FromStr;
10
11/// Define our own because data_encoding::BASE64_MIME, despite its name,
12/// is not RFC2045 compliant, and will not ignore spaces
13const BASE64_RFC2045: data_encoding::Encoding = data_encoding_macro::new_encoding! {
14    symbols: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
15    padding: '=',
16    ignore: " \r\n\t",
17    wrap_width: 76,
18    wrap_separator: "\r\n",
19};
20
21#[derive(Debug, Clone, PartialEq)]
22pub struct MimePart<'a> {
23    /// The bytes that comprise this part, from its beginning to its end
24    bytes: SharedString<'a>,
25    /// The parsed headers from the start of bytes
26    headers: HeaderMap<'a>,
27    /// The index into bytes of the first non-header byte.
28    body_offset: usize,
29    body_len: usize,
30    conformance: MessageConformance,
31    parts: Vec<Self>,
32    /// For multipart, the content the precedes the first boundary
33    intro: SharedString<'a>,
34    /// For multipart, the content the follows the last boundary
35    outro: SharedString<'a>,
36}
37
38struct Rfc2045Info {
39    encoding: ContentTransferEncoding,
40    charset: Charset,
41    content_type: Option<MimeParameters>,
42    is_text: bool,
43    is_multipart: bool,
44    attachment_options: Option<AttachmentOptions>,
45}
46
47impl Rfc2045Info {
48    fn new(headers: &HeaderMap) -> Result<Self> {
49        let content_transfer_encoding = headers.content_transfer_encoding()?;
50
51        let encoding = match &content_transfer_encoding {
52            Some(cte) => ContentTransferEncoding::from_str(&cte.value)?,
53            None => ContentTransferEncoding::SevenBit,
54        };
55
56        let content_type = headers.content_type()?;
57        let charset = if let Some(ct) = &content_type {
58            ct.get("charset")
59        } else {
60            None
61        };
62        let charset = charset.unwrap_or_else(|| "us-ascii".to_string());
63
64        let charset = Charset::for_label_no_replacement(charset.as_bytes())
65            .ok_or_else(|| MailParsingError::BodyParse(format!("unsupported charset {charset}")))?;
66
67        let (is_text, is_multipart) = if let Some(ct) = &content_type {
68            (ct.is_text(), ct.is_multipart())
69        } else {
70            (true, false)
71        };
72
73        let content_disposition = headers.content_disposition()?;
74        let attachment_options = match content_disposition {
75            Some(cd) => {
76                let inline = cd.value == "inline";
77                let content_id = headers.content_id()?;
78                let file_name = cd.get("filename");
79
80                Some(AttachmentOptions {
81                    file_name,
82                    inline,
83                    content_id: content_id.map(|cid| cid.0),
84                })
85            }
86            None => None,
87        };
88
89        Ok(Self {
90            encoding,
91            charset,
92            content_type,
93            is_text,
94            is_multipart,
95            attachment_options,
96        })
97    }
98}
99
100impl<'a> MimePart<'a> {
101    /// Parse some data into a tree of MimeParts
102    pub fn parse<S>(bytes: S) -> Result<Self>
103    where
104        S: IntoSharedString<'a>,
105    {
106        let (bytes, base_conformance) = bytes.into_shared_string();
107        Self::parse_impl(bytes, base_conformance, true)
108    }
109
110    fn parse_impl(
111        bytes: SharedString<'a>,
112        base_conformance: MessageConformance,
113        is_top_level: bool,
114    ) -> Result<Self> {
115        let HeaderParseResult {
116            headers,
117            body_offset,
118            overall_conformance: mut conformance,
119        } = Header::parse_headers(bytes.clone())?;
120
121        conformance |= base_conformance;
122
123        let body_len = bytes.len();
124
125        if !bytes.is_ascii() {
126            conformance.set(MessageConformance::NEEDS_TRANSFER_ENCODING, true);
127        }
128        {
129            let mut prev = 0;
130            for idx in memchr::memchr_iter(b'\n', bytes.as_bytes()) {
131                if idx - prev > 78 {
132                    conformance.set(MessageConformance::LINE_TOO_LONG, true);
133                    break;
134                }
135                prev = idx;
136            }
137        }
138        conformance.set(
139            MessageConformance::NON_CANONICAL_LINE_ENDINGS,
140            has_lone_cr_or_lf(bytes.as_bytes()),
141        );
142
143        if is_top_level {
144            conformance.set(
145                MessageConformance::MISSING_DATE_HEADER,
146                !matches!(headers.date(), Ok(Some(_))),
147            );
148            conformance.set(
149                MessageConformance::MISSING_MESSAGE_ID_HEADER,
150                !matches!(headers.message_id(), Ok(Some(_))),
151            );
152            conformance.set(
153                MessageConformance::MISSING_MIME_VERSION,
154                match headers.mime_version() {
155                    Ok(Some(v)) => v.as_str() != "1.0",
156                    _ => true,
157                },
158            );
159        }
160
161        let mut part = Self {
162            bytes,
163            headers,
164            body_offset,
165            body_len,
166            conformance,
167            parts: vec![],
168            intro: SharedString::Borrowed(""),
169            outro: SharedString::Borrowed(""),
170        };
171
172        part.recursive_parse()?;
173
174        Ok(part)
175    }
176
177    fn recursive_parse(&mut self) -> Result<()> {
178        let info = Rfc2045Info::new(&self.headers)?;
179        if let Some((boundary, true)) = info
180            .content_type
181            .as_ref()
182            .and_then(|ct| ct.get("boundary").map(|b| (b, info.is_multipart)))
183        {
184            let boundary = format!("\n--{boundary}");
185            let raw_body = self
186                .bytes
187                .slice(self.body_offset.saturating_sub(1)..self.bytes.len());
188
189            let mut iter = memchr::memmem::find_iter(raw_body.as_bytes(), &boundary);
190            if let Some(first_boundary_pos) = iter.next() {
191                self.intro = raw_body.slice(0..first_boundary_pos);
192
193                // When we create parts, we ignore the original body span in
194                // favor of what we're parsing out here now
195                self.body_len = 0;
196
197                let mut boundary_end = first_boundary_pos + boundary.len();
198
199                while let Some(part_start) =
200                    memchr::memchr(b'\n', &raw_body.as_bytes()[boundary_end..])
201                        .map(|p| p + boundary_end + 1)
202                {
203                    let part_end = iter
204                        .next()
205                        .map(|p| {
206                            // P is the newline; we want to include it in the raw
207                            // bytes for this part, so look beyond it
208                            p + 1
209                        })
210                        .unwrap_or(raw_body.len());
211
212                    let child = Self::parse_impl(
213                        raw_body.slice(part_start..part_end),
214                        MessageConformance::default(),
215                        false,
216                    )?;
217                    self.conformance |= child.conformance;
218                    self.parts.push(child);
219
220                    boundary_end = part_end -
221                        1 /* newline we adjusted for when assigning part_end */
222                        + boundary.len();
223
224                    if boundary_end + 2 > raw_body.len() {
225                        break;
226                    }
227                    if &raw_body.as_bytes()[boundary_end..boundary_end + 2] == b"--" {
228                        if let Some(after_boundary) =
229                            memchr::memchr(b'\n', &raw_body.as_bytes()[boundary_end..])
230                                .map(|p| p + boundary_end + 1)
231                        {
232                            self.outro = raw_body.slice(after_boundary..raw_body.len());
233                        }
234                        break;
235                    }
236                }
237            }
238        }
239
240        Ok(())
241    }
242
243    pub fn conformance(&self) -> MessageConformance {
244        self.conformance
245    }
246
247    /// Obtain a reference to the child parts
248    pub fn child_parts(&self) -> &[Self] {
249        &self.parts
250    }
251
252    /// Obtain a mutable reference to the child parts
253    pub fn child_parts_mut(&mut self) -> &mut Vec<Self> {
254        &mut self.parts
255    }
256
257    /// Obtains a reference to the headers
258    pub fn headers(&self) -> &HeaderMap {
259        &self.headers
260    }
261
262    /// Obtain a mutable reference to the headers
263    pub fn headers_mut<'b>(&'b mut self) -> &'b mut HeaderMap<'a> {
264        &mut self.headers
265    }
266
267    /// Get the raw, transfer-encoded body
268    pub fn raw_body(&self) -> SharedString {
269        self.bytes
270            .slice(self.body_offset..self.body_len.max(self.body_offset))
271    }
272
273    /// Decode transfer decoding and return the body
274    pub fn body(&self) -> Result<DecodedBody> {
275        let info = Rfc2045Info::new(&self.headers)?;
276
277        let bytes = match info.encoding {
278            ContentTransferEncoding::Base64 => {
279                let data = self.raw_body();
280                let bytes = data.as_bytes();
281                BASE64_RFC2045.decode(bytes).map_err(|err| {
282                    let b = bytes[err.position] as char;
283                    let region =
284                        &bytes[err.position.saturating_sub(8)..(err.position + 8).min(bytes.len())];
285                    let region = String::from_utf8_lossy(region);
286                    MailParsingError::BodyParse(format!(
287                        "base64 decode: {err:#} b={b:?} in {region}"
288                    ))
289                })?
290            }
291            ContentTransferEncoding::QuotedPrintable => quoted_printable::decode(
292                self.raw_body().as_bytes(),
293                quoted_printable::ParseMode::Robust,
294            )
295            .map_err(|err| {
296                MailParsingError::BodyParse(format!("quoted printable decode: {err:#}"))
297            })?,
298            ContentTransferEncoding::SevenBit
299            | ContentTransferEncoding::EightBit
300            | ContentTransferEncoding::Binary
301                if info.is_text =>
302            {
303                return Ok(DecodedBody::Text(self.raw_body()));
304            }
305            ContentTransferEncoding::SevenBit
306            | ContentTransferEncoding::EightBit
307            | ContentTransferEncoding::Binary => {
308                return Ok(DecodedBody::Binary(self.raw_body().as_bytes().to_vec()))
309            }
310        };
311
312        if info.is_text {
313            let (decoded, _malformed) = info.charset.decode_without_bom_handling(&bytes);
314            Ok(DecodedBody::Text(decoded.to_string().into()))
315        } else {
316            Ok(DecodedBody::Binary(bytes))
317        }
318    }
319
320    /// Re-constitute the message.
321    /// Each element will be parsed out, and the parsed form used
322    /// to build a new message.
323    /// This has the side effect of "fixing" non-conforming elements,
324    /// but may come at the cost of "losing" the non-sensical or otherwise
325    /// out of spec elements in the rebuilt message
326    pub fn rebuild(&self) -> Result<Self> {
327        let info = Rfc2045Info::new(&self.headers)?;
328
329        let mut children = vec![];
330        for part in &self.parts {
331            children.push(part.rebuild()?);
332        }
333
334        let mut rebuilt = if children.is_empty() {
335            let body = self.body()?;
336            match body {
337                DecodedBody::Text(text) => {
338                    let ct = info
339                        .content_type
340                        .as_ref()
341                        .map(|ct| ct.value.as_str())
342                        .unwrap_or("text/plain");
343                    Self::new_text(ct, text.as_str())
344                }
345                DecodedBody::Binary(data) => {
346                    let ct = info
347                        .content_type
348                        .as_ref()
349                        .map(|ct| ct.value.as_str())
350                        .unwrap_or("application/octet-stream");
351                    Self::new_binary(ct, &data, info.attachment_options.as_ref())
352                }
353            }
354        } else {
355            let ct = info.content_type.ok_or_else(|| {
356                MailParsingError::BodyParse(
357                    "multipart message has no content-type information!?".to_string(),
358                )
359            })?;
360            Self::new_multipart(&ct.value, children, ct.get("boundary").as_deref())
361        };
362
363        for hdr in self.headers.iter() {
364            // Skip rfc2045 associated headers; we already rebuilt
365            // those above
366            let name = hdr.get_name();
367            if name.eq_ignore_ascii_case("Content-Type")
368                || name.eq_ignore_ascii_case("Content-Transfer-Encoding")
369                || name.eq_ignore_ascii_case("Content-Disposition")
370                || name.eq_ignore_ascii_case("Content-ID")
371            {
372                continue;
373            }
374
375            if let Ok(hdr) = hdr.rebuild() {
376                rebuilt.headers_mut().push(hdr);
377            }
378        }
379
380        Ok(rebuilt)
381    }
382
383    /// Write the message content to the provided output stream
384    pub fn write_message<W: std::io::Write>(&self, out: &mut W) -> Result<()> {
385        let line_ending = if self
386            .conformance
387            .contains(MessageConformance::NON_CANONICAL_LINE_ENDINGS)
388        {
389            "\n"
390        } else {
391            "\r\n"
392        };
393
394        for hdr in self.headers.iter() {
395            hdr.write_header(out)
396                .map_err(|_| MailParsingError::WriteMessageIOError)?;
397        }
398        out.write_all(line_ending.as_bytes())
399            .map_err(|_| MailParsingError::WriteMessageIOError)?;
400
401        if self.parts.is_empty() {
402            out.write_all(self.raw_body().as_bytes())
403                .map_err(|_| MailParsingError::WriteMessageIOError)?;
404        } else {
405            let info = Rfc2045Info::new(&self.headers)?;
406            let ct = info.content_type.ok_or({
407                MailParsingError::WriteMessageWtf(
408                    "expected to have Content-Type when there are child parts",
409                )
410            })?;
411            let boundary = ct.get("boundary").ok_or({
412                MailParsingError::WriteMessageWtf("expected Content-Type to have a boundary")
413            })?;
414            out.write_all(self.intro.as_bytes())
415                .map_err(|_| MailParsingError::WriteMessageIOError)?;
416            for p in &self.parts {
417                write!(out, "--{boundary}{line_ending}")
418                    .map_err(|_| MailParsingError::WriteMessageIOError)?;
419                p.write_message(out)?;
420            }
421            write!(out, "--{boundary}--{line_ending}")
422                .map_err(|_| MailParsingError::WriteMessageIOError)?;
423            out.write_all(self.outro.as_bytes())
424                .map_err(|_| MailParsingError::WriteMessageIOError)?;
425        }
426        Ok(())
427    }
428
429    /// Convenience method wrapping write_message that returns
430    /// the formatted message as a standalone string
431    pub fn to_message_string(&self) -> String {
432        let mut out = vec![];
433        self.write_message(&mut out).unwrap();
434        String::from_utf8_lossy(&out).to_string()
435    }
436
437    pub fn replace_text_body(&mut self, content_type: &str, content: &str) {
438        let mut new_part = Self::new_text(content_type, content);
439        self.bytes = new_part.bytes;
440        self.body_offset = new_part.body_offset;
441        self.body_len = new_part.body_len;
442        // Remove any rfc2047 headers that might reflect how the content
443        // is encoded. Note that we preserve Content-Disposition as that
444        // isn't related purely to the how the content is encoded
445        self.headers.remove_all_named("Content-Type");
446        self.headers.remove_all_named("Content-Transfer-Encoding");
447        // And add any from the new part
448        self.headers.append(&mut new_part.headers.headers);
449    }
450
451    /// Constructs a new part with textual utf8 content.
452    /// quoted-printable transfer encoding will be applied,
453    /// unless it is smaller to represent the text in base64
454    pub fn new_text(content_type: &str, content: &str) -> Self {
455        // We'll probably use qp, so speculatively do the work
456        let qp_encoded = quoted_printable::encode(content);
457
458        let (mut encoded, encoding) = if qp_encoded == content.as_bytes() {
459            (qp_encoded, None)
460        } else if qp_encoded.len() <= BASE64_RFC2045.encode_len(content.len()) {
461            (qp_encoded, Some("quoted-printable"))
462        } else {
463            // Turns out base64 will be smaller; perhaps the content
464            // is dominated by non-ASCII text?
465            (
466                BASE64_RFC2045.encode(content.as_bytes()).into_bytes(),
467                Some("base64"),
468            )
469        };
470
471        if !encoded.ends_with(b"\r\n") {
472            encoded.extend_from_slice(b"\r\n");
473        }
474        let mut headers = HeaderMap::default();
475
476        let mut ct = MimeParameters::new(content_type);
477        ct.set(
478            "charset",
479            if content.is_ascii() {
480                "us-ascii"
481            } else {
482                "utf-8"
483            },
484        );
485        headers.set_content_type(ct);
486
487        if let Some(encoding) = encoding {
488            headers.set_content_transfer_encoding(MimeParameters::new(encoding));
489        }
490
491        let body_len = encoded.len();
492        let bytes =
493            String::from_utf8(encoded).expect("transfer encoder to produce valid ASCII output");
494
495        Self {
496            bytes: bytes.into(),
497            headers,
498            body_offset: 0,
499            body_len,
500            conformance: MessageConformance::default(),
501            parts: vec![],
502            intro: "".into(),
503            outro: "".into(),
504        }
505    }
506
507    pub fn new_text_plain(content: &str) -> Self {
508        Self::new_text("text/plain", content)
509    }
510
511    pub fn new_html(content: &str) -> Self {
512        Self::new_text("text/html", content)
513    }
514
515    pub fn new_multipart(content_type: &str, parts: Vec<Self>, boundary: Option<&str>) -> Self {
516        let mut headers = HeaderMap::default();
517
518        let mut ct = MimeParameters::new(content_type);
519        match boundary {
520            Some(b) => {
521                ct.set("boundary", b);
522            }
523            None => {
524                // Generate a random boundary
525                let uuid = uuid::Uuid::new_v4();
526                let boundary = data_encoding::BASE64_NOPAD.encode(uuid.as_bytes());
527                ct.set("boundary", &boundary);
528            }
529        }
530        headers.set_content_type(ct);
531
532        Self {
533            bytes: "".into(),
534            headers,
535            body_offset: 0,
536            body_len: 0,
537            conformance: MessageConformance::default(),
538            parts,
539            intro: "".into(),
540            outro: "".into(),
541        }
542    }
543
544    pub fn new_binary(
545        content_type: &str,
546        content: &[u8],
547        options: Option<&AttachmentOptions>,
548    ) -> Self {
549        let mut encoded = BASE64_RFC2045.encode(content);
550        if !encoded.ends_with("\r\n") {
551            encoded.push_str("\r\n");
552        }
553        let mut headers = HeaderMap::default();
554
555        headers.set_content_type(MimeParameters::new(content_type));
556        headers.set_content_transfer_encoding(MimeParameters::new("base64"));
557
558        if let Some(opts) = options {
559            let mut cd = MimeParameters::new(if opts.inline { "inline" } else { "attachment" });
560            if let Some(name) = &opts.file_name {
561                cd.set("filename", name);
562            }
563            headers.set_content_disposition(cd);
564
565            if let Some(id) = &opts.content_id {
566                headers.set_content_id(MessageID(id.to_string()));
567            }
568        }
569
570        let body_len = encoded.len();
571
572        Self {
573            bytes: encoded.into(),
574            headers,
575            body_offset: 0,
576            body_len,
577            conformance: MessageConformance::default(),
578            parts: vec![],
579            intro: "".into(),
580            outro: "".into(),
581        }
582    }
583
584    /// Returns a SimplifiedStructure representation of the mime tree,
585    /// with the (probable) primary text/plain and text/html parts
586    /// pulled out, and the remaining parts recorded as a flat
587    /// attachments array
588    pub fn simplified_structure(&'a self) -> Result<SimplifiedStructure<'a>> {
589        let parts = self.simplified_structure_pointers()?;
590
591        let mut text = None;
592        let mut html = None;
593
594        let headers = &self
595            .resolve_ptr(parts.header_part)
596            .expect("header part to always be valid")
597            .headers;
598
599        if let Some(p) = parts.text_part.and_then(|p| self.resolve_ptr(p)) {
600            text = match p.body()? {
601                DecodedBody::Text(t) => Some(t),
602                DecodedBody::Binary(_) => {
603                    return Err(MailParsingError::BodyParse(
604                        "expected text/plain part to be text, but it is binary".to_string(),
605                    ))
606                }
607            };
608        }
609        if let Some(p) = parts.html_part.and_then(|p| self.resolve_ptr(p)) {
610            html = match p.body()? {
611                DecodedBody::Text(t) => Some(t),
612                DecodedBody::Binary(_) => {
613                    return Err(MailParsingError::BodyParse(
614                        "expected text/html part to be text, but it is binary".to_string(),
615                    ))
616                }
617            };
618        }
619
620        let mut attachments = vec![];
621        for ptr in parts.attachments {
622            attachments.push(self.resolve_ptr(ptr).expect("pointer to be valid").clone());
623        }
624
625        Ok(SimplifiedStructure {
626            text,
627            html,
628            headers,
629            attachments,
630        })
631    }
632
633    /// Resolve a PartPointer to the corresponding MimePart
634    pub fn resolve_ptr(&self, ptr: PartPointer) -> Option<&Self> {
635        let mut current = self;
636        let mut cursor = ptr.0.as_slice();
637
638        loop {
639            match cursor.first() {
640                Some(&idx) => {
641                    current = current.parts.get(idx as usize)?;
642                    cursor = &cursor[1..];
643                }
644                None => {
645                    // We have completed the walk
646                    return Some(current);
647                }
648            }
649        }
650    }
651
652    /// Resolve a PartPointer to the corresponding MimePart, for mutable access
653    pub fn resolve_ptr_mut(&mut self, ptr: PartPointer) -> Option<&mut Self> {
654        let mut current = self;
655        let mut cursor = ptr.0.as_slice();
656
657        loop {
658            match cursor.first() {
659                Some(&idx) => {
660                    current = current.parts.get_mut(idx as usize)?;
661                    cursor = &cursor[1..];
662                }
663                None => {
664                    // We have completed the walk
665                    return Some(current);
666                }
667            }
668        }
669    }
670
671    /// Returns a set of PartPointers that locate the (probable) primary
672    /// text/plain and text/html parts, and the remaining parts recorded
673    /// as a flat attachments array.  The resulting
674    /// PartPointers can be resolved to their actual instances for both
675    /// immutable and mutable operations via resolve_ptr and resolve_ptr_mut.
676    pub fn simplified_structure_pointers(&self) -> Result<SimplifiedStructurePointers> {
677        self.simplified_structure_pointers_impl(None)
678    }
679
680    fn simplified_structure_pointers_impl(
681        &self,
682        my_idx: Option<u8>,
683    ) -> Result<SimplifiedStructurePointers> {
684        let info = Rfc2045Info::new(&self.headers)?;
685        let is_inline = info
686            .attachment_options
687            .as_ref()
688            .map(|ao| ao.inline)
689            .unwrap_or(true);
690
691        if let Some(ct) = &info.content_type {
692            if is_inline {
693                if ct.value == "text/plain" {
694                    return Ok(SimplifiedStructurePointers {
695                        text_part: Some(PartPointer::root_or_nth(my_idx)),
696                        html_part: None,
697                        header_part: PartPointer::root_or_nth(my_idx),
698                        attachments: vec![],
699                    });
700                }
701                if ct.value == "text/html" {
702                    return Ok(SimplifiedStructurePointers {
703                        html_part: Some(PartPointer::root_or_nth(my_idx)),
704                        text_part: None,
705                        header_part: PartPointer::root_or_nth(my_idx),
706                        attachments: vec![],
707                    });
708                }
709            }
710
711            if ct.value.starts_with("multipart/") {
712                let mut text_part = None;
713                let mut html_part = None;
714                let mut attachments = vec![];
715
716                for (i, p) in self.parts.iter().enumerate() {
717                    let part_idx = i.try_into().map_err(|_| MailParsingError::TooManyParts)?;
718                    if let Ok(mut s) = p.simplified_structure_pointers_impl(Some(part_idx)) {
719                        if text_part.is_none() {
720                            if let Some(p) = s.text_part {
721                                text_part.replace(PartPointer::root_or_nth(my_idx).append(p));
722                            }
723                        }
724                        if html_part.is_none() {
725                            if let Some(p) = s.html_part {
726                                html_part.replace(PartPointer::root_or_nth(my_idx).append(p));
727                            }
728                        }
729                        attachments.append(&mut s.attachments);
730                    }
731                }
732
733                return Ok(SimplifiedStructurePointers {
734                    html_part,
735                    text_part,
736                    header_part: PartPointer::root_or_nth(my_idx),
737                    attachments,
738                });
739            }
740
741            return Ok(SimplifiedStructurePointers {
742                html_part: None,
743                text_part: None,
744                header_part: PartPointer::root_or_nth(my_idx),
745                attachments: vec![PartPointer::root_or_nth(my_idx)],
746            });
747        }
748
749        // Assume text/plain content-type
750        Ok(SimplifiedStructurePointers {
751            text_part: Some(PartPointer::root_or_nth(my_idx)),
752            html_part: None,
753            header_part: PartPointer::root_or_nth(my_idx),
754            attachments: vec![],
755        })
756    }
757}
758
759/// References the position of a MimePart by encoding the steps in
760/// a tree walking operation. The encoding of PartPointer is a
761/// sequence of integers that identify the index of a child part
762/// by its level within the mime tree, selecting the current node
763/// when no more indices remain. eg: `[]` indicates the
764/// root part, while `[0]` is the 0th child of the root.
765#[derive(Debug, Clone, PartialEq, Eq)]
766pub struct PartPointer(Vec<u8>);
767
768impl PartPointer {
769    /// Construct a PartPointer that references the root node
770    pub fn root() -> Self {
771        Self(vec![])
772    }
773
774    /// Construct a PartPointer that references either the nth
775    /// or the root node depending upon the passed parameter
776    pub fn root_or_nth(n: Option<u8>) -> Self {
777        match n {
778            Some(n) => Self::nth(n),
779            None => Self::root(),
780        }
781    }
782
783    /// Construct a PartPointer that references the nth child
784    pub fn nth(n: u8) -> Self {
785        Self(vec![n])
786    }
787
788    /// Join other onto self, consuming self and producing
789    /// a pointer that makes other relative to self
790    pub fn append(mut self, mut other: Self) -> Self {
791        self.0.append(&mut other.0);
792        Self(self.0)
793    }
794}
795
796#[derive(Debug, Clone)]
797pub struct SimplifiedStructurePointers {
798    /// The primary text/plain part
799    pub text_part: Option<PartPointer>,
800    /// The primary text/html part
801    pub html_part: Option<PartPointer>,
802    /// The "top level" set of headers for the message
803    pub header_part: PartPointer,
804    /// all other (terminal) parts are attachments
805    pub attachments: Vec<PartPointer>,
806}
807
808#[derive(Debug, Clone)]
809pub struct SimplifiedStructure<'a> {
810    pub text: Option<SharedString<'a>>,
811    pub html: Option<SharedString<'a>>,
812    pub headers: &'a HeaderMap<'a>,
813    pub attachments: Vec<MimePart<'a>>,
814}
815
816#[derive(Debug, Clone, PartialEq)]
817pub struct AttachmentOptions {
818    pub file_name: Option<String>,
819    pub inline: bool,
820    pub content_id: Option<String>,
821}
822
823#[derive(Debug, Clone, Copy, PartialEq, Eq)]
824pub enum ContentTransferEncoding {
825    SevenBit,
826    EightBit,
827    Binary,
828    QuotedPrintable,
829    Base64,
830}
831
832impl FromStr for ContentTransferEncoding {
833    type Err = MailParsingError;
834
835    fn from_str(s: &str) -> Result<Self> {
836        if s.eq_ignore_ascii_case("7bit") {
837            Ok(Self::SevenBit)
838        } else if s.eq_ignore_ascii_case("8bit") {
839            Ok(Self::EightBit)
840        } else if s.eq_ignore_ascii_case("binary") {
841            Ok(Self::Binary)
842        } else if s.eq_ignore_ascii_case("quoted-printable") {
843            Ok(Self::QuotedPrintable)
844        } else if s.eq_ignore_ascii_case("base64") {
845            Ok(Self::Base64)
846        } else {
847            Err(MailParsingError::InvalidContentTransferEncoding(
848                s.to_string(),
849            ))
850        }
851    }
852}
853
854#[derive(Debug, PartialEq)]
855pub enum DecodedBody<'a> {
856    Text(SharedString<'a>),
857    Binary(Vec<u8>),
858}
859
860impl<'a> DecodedBody<'a> {
861    pub fn to_string_lossy(&'a self) -> Cow<'a, str> {
862        match self {
863            Self::Text(s) => Cow::Borrowed(s),
864            Self::Binary(b) => String::from_utf8_lossy(b),
865        }
866    }
867}
868
869#[cfg(test)]
870mod test {
871    use super::*;
872
873    #[test]
874    fn msg_parsing() {
875        let message = concat!(
876            "Subject: hello there\n",
877            "From:  Someone <someone@example.com>\n",
878            "\n",
879            "I am the body"
880        );
881
882        let part = MimePart::parse(message).unwrap();
883        k9::assert_equal!(message, part.to_message_string());
884        assert_eq!(part.raw_body(), "I am the body");
885        k9::snapshot!(
886            part.body(),
887            r#"
888Ok(
889    Text(
890        "I am the body",
891    ),
892)
893"#
894        );
895
896        k9::snapshot!(
897            part.rebuild().unwrap().to_message_string(),
898            r#"
899Content-Type: text/plain;\r
900\tcharset="us-ascii"\r
901Subject: hello there\r
902From: Someone <someone@example.com>\r
903\r
904I am the body\r
905
906"#
907        );
908    }
909
910    #[test]
911    fn mime_bogus_body() {
912        let message = concat!(
913            "Subject: hello there\n",
914            "From: Someone <someone@example.com>\n",
915            "Mime-Version: 1.0\n",
916            "Content-Type: text/plain\n",
917            "Content-Transfer-Encoding: base64\n",
918            "\n",
919            "hello\n"
920        );
921
922        let part = MimePart::parse(message).unwrap();
923        assert_eq!(
924            part.body().unwrap_err(),
925            MailParsingError::BodyParse(
926                "base64 decode: invalid length at 4 b='o' in hello\n".to_string()
927            )
928        );
929    }
930
931    #[test]
932    fn mime_encoded_body() {
933        let message = concat!(
934            "Subject: hello there\n",
935            "From: Someone <someone@example.com>\n",
936            "Mime-Version: 1.0\n",
937            "Content-Type: text/plain\n",
938            "Content-Transfer-Encoding: base64\n",
939            "\n",
940            "aGVsbG8K\n"
941        );
942
943        let part = MimePart::parse(message).unwrap();
944        k9::assert_equal!(message, part.to_message_string());
945        assert_eq!(part.raw_body(), "aGVsbG8K\n");
946        k9::snapshot!(
947            part.body(),
948            r#"
949Ok(
950    Text(
951        "hello
952",
953    ),
954)
955"#
956        );
957
958        k9::snapshot!(
959            part.rebuild().unwrap().to_message_string(),
960            r#"
961Content-Type: text/plain;\r
962\tcharset="us-ascii"\r
963Content-Transfer-Encoding: quoted-printable\r
964Subject: hello there\r
965From: Someone <someone@example.com>\r
966Mime-Version: 1.0\r
967\r
968hello=0A\r
969
970"#
971        );
972    }
973
974    #[test]
975    fn mime_multipart_1() {
976        let message = concat!(
977            "Subject: This is a test email\n",
978            "Content-Type: multipart/alternative; boundary=foobar\n",
979            "Mime-Version: 1.0\n",
980            "Date: Sun, 02 Oct 2016 07:06:22 -0700 (PDT)\n",
981            "\n",
982            "--foobar\n",
983            "Content-Type: text/plain; charset=utf-8\n",
984            "Content-Transfer-Encoding: quoted-printable\n",
985            "\n",
986            "This is the plaintext version, in utf-8. Proof by Euro: =E2=82=AC\n",
987            "--foobar\n",
988            "Content-Type: text/html\n",
989            "Content-Transfer-Encoding: base64\n",
990            "\n",
991            "PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \n",
992            "dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \n",
993            "--foobar--\n",
994            "After the final boundary stuff gets ignored.\n"
995        );
996
997        let part = MimePart::parse(message).unwrap();
998
999        k9::assert_equal!(message, part.to_message_string());
1000
1001        let children = part.child_parts();
1002        k9::assert_equal!(children.len(), 2);
1003
1004        k9::snapshot!(
1005            children[0].body(),
1006            r#"
1007Ok(
1008    Text(
1009        "This is the plaintext version, in utf-8. Proof by Euro: €\r
1010",
1011    ),
1012)
1013"#
1014        );
1015        k9::snapshot!(
1016            children[1].body(),
1017            r#"
1018Ok(
1019    Text(
1020        "<html><body>This is the <b>HTML</b> version, in us-ascii. Proof by Euro: &euro;</body></html>
1021",
1022    ),
1023)
1024"#
1025        );
1026    }
1027
1028    #[test]
1029    fn mutate_1() {
1030        let message = concat!(
1031            "Subject: This is a test email\r\n",
1032            "Content-Type: multipart/alternative; boundary=foobar\r\n",
1033            "Mime-Version: 1.0\r\n",
1034            "Date: Sun, 02 Oct 2016 07:06:22 -0700 (PDT)\r\n",
1035            "\r\n",
1036            "--foobar\r\n",
1037            "Content-Type: text/plain; charset=utf-8\r\n",
1038            "Content-Transfer-Encoding: quoted-printable\r\n",
1039            "\r\n",
1040            "This is the plaintext version, in utf-8. Proof by Euro: =E2=82=AC\r\n",
1041            "--foobar\r\n",
1042            "Content-Type: text/html\r\n",
1043            "Content-Transfer-Encoding: base64\r\n",
1044            "\r\n",
1045            "PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \r\n",
1046            "dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \r\n",
1047            "--foobar--\r\n",
1048            "After the final boundary stuff gets ignored.\r\n"
1049        );
1050
1051        let mut part = MimePart::parse(message).unwrap();
1052        k9::assert_equal!(message, part.to_message_string());
1053        fn munge(part: &mut MimePart) {
1054            let headers = part.headers_mut();
1055            headers.push(Header::with_name_value("X-Woot", "Hello"));
1056            headers.insert(0, Header::with_name_value("X-First", "at the top"));
1057            headers.retain(|hdr| !hdr.get_name().eq_ignore_ascii_case("date"));
1058        }
1059        munge(&mut part);
1060
1061        let re_encoded = part.to_message_string();
1062        k9::snapshot!(
1063            re_encoded,
1064            r#"
1065X-First: at the top\r
1066Subject: This is a test email\r
1067Content-Type: multipart/alternative; boundary=foobar\r
1068Mime-Version: 1.0\r
1069X-Woot: Hello\r
1070\r
1071--foobar\r
1072Content-Type: text/plain; charset=utf-8\r
1073Content-Transfer-Encoding: quoted-printable\r
1074\r
1075This is the plaintext version, in utf-8. Proof by Euro: =E2=82=AC\r
1076--foobar\r
1077Content-Type: text/html\r
1078Content-Transfer-Encoding: base64\r
1079\r
1080PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \r
1081dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \r
1082--foobar--\r
1083After the final boundary stuff gets ignored.\r
1084
1085"#
1086        );
1087
1088        eprintln!("part before mutate:\n{part:#?}");
1089
1090        part.child_parts_mut().retain(|part| {
1091            let ct = part.headers().content_type().unwrap().unwrap();
1092            ct.value == "text/html"
1093        });
1094
1095        eprintln!("part with html removed is:\n{part:#?}");
1096
1097        let re_encoded = part.to_message_string();
1098        k9::snapshot!(
1099            re_encoded,
1100            r#"
1101X-First: at the top\r
1102Subject: This is a test email\r
1103Content-Type: multipart/alternative; boundary=foobar\r
1104Mime-Version: 1.0\r
1105X-Woot: Hello\r
1106\r
1107--foobar\r
1108Content-Type: text/html\r
1109Content-Transfer-Encoding: base64\r
1110\r
1111PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \r
1112dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \r
1113--foobar--\r
1114After the final boundary stuff gets ignored.\r
1115
1116"#
1117        );
1118    }
1119
1120    #[test]
1121    fn replace_text_body() {
1122        let mut part = MimePart::new_text_plain("Hello 👻\r\n");
1123        let encoded = part.to_message_string();
1124        k9::snapshot!(
1125            &encoded,
1126            r#"
1127Content-Type: text/plain;\r
1128\tcharset="utf-8"\r
1129Content-Transfer-Encoding: base64\r
1130\r
1131SGVsbG8g8J+Ruw0K\r
1132
1133"#
1134        );
1135
1136        part.replace_text_body("text/plain", "Hello 🚀\r\n");
1137        let encoded = part.to_message_string();
1138        k9::snapshot!(
1139            &encoded,
1140            r#"
1141Content-Type: text/plain;\r
1142\tcharset="utf-8"\r
1143Content-Transfer-Encoding: base64\r
1144\r
1145SGVsbG8g8J+agA0K\r
1146
1147"#
1148        );
1149    }
1150
1151    #[test]
1152    fn construct_1() {
1153        let input_text = "Well, hello there! This is the plaintext version, in utf-8. Here's a Euro: €, and here are some emoji 👻 🍉 💩 and this long should be long enough that we wrap it in the returned part, let's see how that turns out!\r\n";
1154
1155        let part = MimePart::new_text_plain(input_text);
1156
1157        let encoded = part.to_message_string();
1158        k9::snapshot!(
1159            &encoded,
1160            r#"
1161Content-Type: text/plain;\r
1162\tcharset="utf-8"\r
1163Content-Transfer-Encoding: quoted-printable\r
1164\r
1165Well, hello there! This is the plaintext version, in utf-8. Here's a Euro: =\r
1166=E2=82=AC, and here are some emoji =F0=9F=91=BB =F0=9F=8D=89 =F0=9F=92=A9 a=\r
1167nd this long should be long enough that we wrap it in the returned part, le=\r
1168t's see how that turns out!\r
1169
1170"#
1171        );
1172
1173        let parsed_part = MimePart::parse(encoded.clone()).unwrap();
1174        k9::assert_equal!(encoded.as_str(), parsed_part.to_message_string().as_str());
1175        k9::assert_equal!(part.body().unwrap(), DecodedBody::Text(input_text.into()));
1176        k9::snapshot!(
1177            parsed_part.simplified_structure_pointers(),
1178            "
1179Ok(
1180    SimplifiedStructurePointers {
1181        text_part: Some(
1182            PartPointer(
1183                [],
1184            ),
1185        ),
1186        html_part: None,
1187        header_part: PartPointer(
1188            [],
1189        ),
1190        attachments: [],
1191    },
1192)
1193"
1194        );
1195    }
1196
1197    #[test]
1198    fn construct_2() {
1199        let msg = MimePart::new_multipart(
1200            "multipart/mixed",
1201            vec![
1202                MimePart::new_text_plain("plain text"),
1203                MimePart::new_html("<b>rich</b> text"),
1204                MimePart::new_binary(
1205                    "application/octet-stream",
1206                    &[0, 1, 2, 3],
1207                    Some(&AttachmentOptions {
1208                        file_name: Some("woot.bin".to_string()),
1209                        inline: false,
1210                        content_id: Some("woot.id".to_string()),
1211                    }),
1212                ),
1213            ],
1214            Some("my-boundary"),
1215        );
1216        k9::snapshot!(
1217            msg.to_message_string(),
1218            r#"
1219Content-Type: multipart/mixed;\r
1220\tboundary="my-boundary"\r
1221\r
1222--my-boundary\r
1223Content-Type: text/plain;\r
1224\tcharset="us-ascii"\r
1225\r
1226plain text\r
1227--my-boundary\r
1228Content-Type: text/html;\r
1229\tcharset="us-ascii"\r
1230\r
1231<b>rich</b> text\r
1232--my-boundary\r
1233Content-Type: application/octet-stream\r
1234Content-Transfer-Encoding: base64\r
1235Content-Disposition: attachment;\r
1236\tfilename="woot.bin"\r
1237Content-ID: <woot.id>\r
1238\r
1239AAECAw==\r
1240--my-boundary--\r
1241
1242"#
1243        );
1244
1245        k9::snapshot!(
1246            msg.simplified_structure_pointers(),
1247            "
1248Ok(
1249    SimplifiedStructurePointers {
1250        text_part: Some(
1251            PartPointer(
1252                [
1253                    0,
1254                ],
1255            ),
1256        ),
1257        html_part: Some(
1258            PartPointer(
1259                [
1260                    1,
1261                ],
1262            ),
1263        ),
1264        header_part: PartPointer(
1265            [],
1266        ),
1267        attachments: [
1268            PartPointer(
1269                [
1270                    2,
1271                ],
1272            ),
1273        ],
1274    },
1275)
1276"
1277        );
1278    }
1279
1280    #[test]
1281    fn funky_headers() {
1282        let message = concat!(
1283            "Subject\r\n",
1284            "Other:\r\n",
1285            "Content-Type: multipart/alternative; boundary=foobar\r\n",
1286            "Mime-Version: 1.0\r\n",
1287            "Date: Sun, 02 Oct 2016 07:06:22 -0700 (PDT)\r\n",
1288            "\r\n",
1289            "The body.\r\n"
1290        );
1291
1292        let part = MimePart::parse(message).unwrap();
1293        assert!(part
1294            .conformance()
1295            .contains(MessageConformance::MISSING_COLON_VALUE));
1296    }
1297
1298    /// This is a regression test for an issue where we'd interpret the
1299    /// binary bytes as default windows-1252 codepage charset, and mangle them.
1300    /// The high byte is sufficient to trigger the offending code prior
1301    /// to the fix
1302    #[test]
1303    fn rebuild_binary() {
1304        let expect = &[0, 1, 2, 3, 0xbe, 4, 5];
1305        let part = MimePart::new_binary("applicat/octet-stream", expect, None);
1306
1307        let rebuilt = part.rebuild().unwrap();
1308        let body = rebuilt.body().unwrap();
1309
1310        assert_eq!(body, DecodedBody::Binary(expect.to_vec()));
1311    }
1312}