mailparsing/
mimepart.rs

1use crate::header::{HeaderParseResult, MessageConformance};
2use crate::headermap::HeaderMap;
3use crate::strings::IntoSharedString;
4use crate::{
5    has_lone_cr_or_lf, Header, MailParsingError, MessageID, MimeParameters, Result, SharedString,
6};
7use charset::Charset;
8use std::borrow::Cow;
9use std::str::FromStr;
10
11/// Define our own because data_encoding::BASE64_MIME, despite its name,
12/// is not RFC2045 compliant, and will not ignore spaces
13const BASE64_RFC2045: data_encoding::Encoding = data_encoding_macro::new_encoding! {
14    symbols: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
15    padding: '=',
16    ignore: " \r\n\t",
17    wrap_width: 76,
18    wrap_separator: "\r\n",
19};
20
21#[derive(Debug, Clone, PartialEq)]
22pub struct MimePart<'a> {
23    /// The bytes that comprise this part, from its beginning to its end
24    bytes: SharedString<'a>,
25    /// The parsed headers from the start of bytes
26    headers: HeaderMap<'a>,
27    /// The index into bytes of the first non-header byte.
28    body_offset: usize,
29    body_len: usize,
30    conformance: MessageConformance,
31    parts: Vec<Self>,
32    /// For multipart, the content the precedes the first boundary
33    intro: SharedString<'a>,
34    /// For multipart, the content the follows the last boundary
35    outro: SharedString<'a>,
36}
37
38struct Rfc2045Info {
39    encoding: ContentTransferEncoding,
40    charset: Charset,
41    content_type: Option<MimeParameters>,
42    is_text: bool,
43    is_multipart: bool,
44    attachment_options: Option<AttachmentOptions>,
45}
46
47impl Rfc2045Info {
48    fn new(headers: &HeaderMap) -> Result<Self> {
49        let content_transfer_encoding = headers.content_transfer_encoding()?;
50
51        let encoding = match &content_transfer_encoding {
52            Some(cte) => ContentTransferEncoding::from_str(&cte.value)?,
53            None => ContentTransferEncoding::SevenBit,
54        };
55
56        let content_type = headers.content_type()?;
57        let charset = if let Some(ct) = &content_type {
58            ct.get("charset")
59        } else {
60            None
61        };
62        let charset = charset.unwrap_or_else(|| "us-ascii".to_string());
63
64        let charset = Charset::for_label_no_replacement(charset.as_bytes())
65            .ok_or_else(|| MailParsingError::BodyParse(format!("unsupported charset {charset}")))?;
66
67        let (is_text, is_multipart) = if let Some(ct) = &content_type {
68            (ct.is_text(), ct.is_multipart())
69        } else {
70            (true, false)
71        };
72
73        let content_disposition = headers.content_disposition()?;
74        let attachment_options = match content_disposition {
75            Some(cd) => {
76                let inline = cd.value == "inline";
77                let content_id = headers.content_id()?;
78                let file_name = cd.get("filename");
79
80                Some(AttachmentOptions {
81                    file_name,
82                    inline,
83                    content_id: content_id.map(|cid| cid.0),
84                })
85            }
86            None => None,
87        };
88
89        Ok(Self {
90            encoding,
91            charset,
92            content_type,
93            is_text,
94            is_multipart,
95            attachment_options,
96        })
97    }
98}
99
100impl<'a> MimePart<'a> {
101    /// Parse some data into a tree of MimeParts
102    pub fn parse<S>(bytes: S) -> Result<Self>
103    where
104        S: IntoSharedString<'a>,
105    {
106        let (bytes, base_conformance) = bytes.into_shared_string();
107        Self::parse_impl(bytes, base_conformance, true)
108    }
109
110    fn parse_impl(
111        bytes: SharedString<'a>,
112        base_conformance: MessageConformance,
113        is_top_level: bool,
114    ) -> Result<Self> {
115        let HeaderParseResult {
116            headers,
117            body_offset,
118            overall_conformance: mut conformance,
119        } = Header::parse_headers(bytes.clone())?;
120
121        conformance |= base_conformance;
122
123        let body_len = bytes.len();
124
125        if !bytes.is_ascii() {
126            conformance.set(MessageConformance::NEEDS_TRANSFER_ENCODING, true);
127        }
128        {
129            let mut prev = 0;
130            for idx in memchr::memchr_iter(b'\n', bytes.as_bytes()) {
131                if idx - prev > 78 {
132                    conformance.set(MessageConformance::LINE_TOO_LONG, true);
133                    break;
134                }
135                prev = idx;
136            }
137        }
138        conformance.set(
139            MessageConformance::NON_CANONICAL_LINE_ENDINGS,
140            has_lone_cr_or_lf(bytes.as_bytes()),
141        );
142
143        if is_top_level {
144            conformance.set(
145                MessageConformance::MISSING_DATE_HEADER,
146                !matches!(headers.date(), Ok(Some(_))),
147            );
148            conformance.set(
149                MessageConformance::MISSING_MESSAGE_ID_HEADER,
150                !matches!(headers.message_id(), Ok(Some(_))),
151            );
152            conformance.set(
153                MessageConformance::MISSING_MIME_VERSION,
154                match headers.mime_version() {
155                    Ok(Some(v)) => v.as_str() != "1.0",
156                    _ => true,
157                },
158            );
159        }
160
161        let mut part = Self {
162            bytes,
163            headers,
164            body_offset,
165            body_len,
166            conformance,
167            parts: vec![],
168            intro: SharedString::Borrowed(""),
169            outro: SharedString::Borrowed(""),
170        };
171
172        part.recursive_parse()?;
173
174        Ok(part)
175    }
176
177    fn recursive_parse(&mut self) -> Result<()> {
178        let info = Rfc2045Info::new(&self.headers)?;
179        if let Some((boundary, true)) = info
180            .content_type
181            .as_ref()
182            .and_then(|ct| ct.get("boundary").map(|b| (b, info.is_multipart)))
183        {
184            let boundary = format!("\n--{boundary}");
185            let raw_body = self
186                .bytes
187                .slice(self.body_offset.saturating_sub(1)..self.bytes.len());
188
189            let mut iter = memchr::memmem::find_iter(raw_body.as_bytes(), &boundary);
190            if let Some(first_boundary_pos) = iter.next() {
191                self.intro = raw_body.slice(0..first_boundary_pos);
192
193                // When we create parts, we ignore the original body span in
194                // favor of what we're parsing out here now
195                self.body_len = 0;
196
197                let mut boundary_end = first_boundary_pos + boundary.len();
198
199                while let Some(part_start) =
200                    memchr::memchr(b'\n', &raw_body.as_bytes()[boundary_end..])
201                        .map(|p| p + boundary_end + 1)
202                {
203                    let part_end = iter
204                        .next()
205                        .map(|p| {
206                            // P is the newline; we want to include it in the raw
207                            // bytes for this part, so look beyond it
208                            p + 1
209                        })
210                        .unwrap_or(raw_body.len());
211
212                    let child = Self::parse_impl(
213                        raw_body.slice(part_start..part_end),
214                        MessageConformance::default(),
215                        false,
216                    )?;
217                    self.conformance |= child.conformance;
218                    self.parts.push(child);
219
220                    boundary_end = part_end -
221                        1 /* newline we adjusted for when assigning part_end */
222                        + boundary.len();
223
224                    if boundary_end + 2 > raw_body.len() {
225                        break;
226                    }
227                    if &raw_body.as_bytes()[boundary_end..boundary_end + 2] == b"--" {
228                        if let Some(after_boundary) =
229                            memchr::memchr(b'\n', &raw_body.as_bytes()[boundary_end..])
230                                .map(|p| p + boundary_end + 1)
231                        {
232                            self.outro = raw_body.slice(after_boundary..raw_body.len());
233                        }
234                        break;
235                    }
236                }
237            }
238        }
239
240        Ok(())
241    }
242
243    pub fn conformance(&self) -> MessageConformance {
244        self.conformance
245    }
246
247    /// Obtain a reference to the child parts
248    pub fn child_parts(&self) -> &[Self] {
249        &self.parts
250    }
251
252    /// Obtain a mutable reference to the child parts
253    pub fn child_parts_mut(&mut self) -> &mut Vec<Self> {
254        &mut self.parts
255    }
256
257    /// Obtains a reference to the headers
258    pub fn headers(&self) -> &HeaderMap {
259        &self.headers
260    }
261
262    /// Obtain a mutable reference to the headers
263    pub fn headers_mut<'b>(&'b mut self) -> &'b mut HeaderMap<'a> {
264        &mut self.headers
265    }
266
267    /// Get the raw, transfer-encoded body
268    pub fn raw_body(&self) -> SharedString {
269        self.bytes
270            .slice(self.body_offset..self.body_len.max(self.body_offset))
271    }
272
273    /// Decode transfer decoding and return the body
274    pub fn body(&self) -> Result<DecodedBody> {
275        let info = Rfc2045Info::new(&self.headers)?;
276
277        let bytes = match info.encoding {
278            ContentTransferEncoding::Base64 => {
279                let data = self.raw_body();
280                let bytes = data.as_bytes();
281                BASE64_RFC2045.decode(bytes).map_err(|err| {
282                    let b = bytes[err.position] as char;
283                    let region = &bytes[err.position.saturating_sub(8)..err.position + 8];
284                    let region = String::from_utf8_lossy(region);
285                    MailParsingError::BodyParse(format!(
286                        "base64 decode: {err:#} b={b:?} in {region}"
287                    ))
288                })?
289            }
290            ContentTransferEncoding::QuotedPrintable => quoted_printable::decode(
291                self.raw_body().as_bytes(),
292                quoted_printable::ParseMode::Robust,
293            )
294            .map_err(|err| {
295                MailParsingError::BodyParse(format!("quoted printable decode: {err:#}"))
296            })?,
297            ContentTransferEncoding::SevenBit
298            | ContentTransferEncoding::EightBit
299            | ContentTransferEncoding::Binary
300                if info.is_text =>
301            {
302                return Ok(DecodedBody::Text(self.raw_body()));
303            }
304            ContentTransferEncoding::SevenBit
305            | ContentTransferEncoding::EightBit
306            | ContentTransferEncoding::Binary => {
307                return Ok(DecodedBody::Binary(self.raw_body().as_bytes().to_vec()))
308            }
309        };
310
311        if info.is_text {
312            let (decoded, _malformed) = info.charset.decode_without_bom_handling(&bytes);
313            Ok(DecodedBody::Text(decoded.to_string().into()))
314        } else {
315            Ok(DecodedBody::Binary(bytes))
316        }
317    }
318
319    /// Re-constitute the message.
320    /// Each element will be parsed out, and the parsed form used
321    /// to build a new message.
322    /// This has the side effect of "fixing" non-conforming elements,
323    /// but may come at the cost of "losing" the non-sensical or otherwise
324    /// out of spec elements in the rebuilt message
325    pub fn rebuild(&self) -> Result<Self> {
326        let info = Rfc2045Info::new(&self.headers)?;
327
328        let mut children = vec![];
329        for part in &self.parts {
330            children.push(part.rebuild()?);
331        }
332
333        let mut rebuilt = if children.is_empty() {
334            let body = self.body()?;
335            match body {
336                DecodedBody::Text(text) => {
337                    let ct = info
338                        .content_type
339                        .as_ref()
340                        .map(|ct| ct.value.as_str())
341                        .unwrap_or("text/plain");
342                    Self::new_text(ct, text.as_str())
343                }
344                DecodedBody::Binary(data) => {
345                    let ct = info
346                        .content_type
347                        .as_ref()
348                        .map(|ct| ct.value.as_str())
349                        .unwrap_or("application/octet-stream");
350                    Self::new_binary(ct, &data, info.attachment_options.as_ref())
351                }
352            }
353        } else {
354            let ct = info.content_type.ok_or_else(|| {
355                MailParsingError::BodyParse(
356                    "multipart message has no content-type information!?".to_string(),
357                )
358            })?;
359            Self::new_multipart(&ct.value, children, ct.get("boundary").as_deref())
360        };
361
362        for hdr in self.headers.iter() {
363            // Skip rfc2045 associated headers; we already rebuilt
364            // those above
365            let name = hdr.get_name();
366            if name.eq_ignore_ascii_case("Content-Type")
367                || name.eq_ignore_ascii_case("Content-Transfer-Encoding")
368                || name.eq_ignore_ascii_case("Content-Disposition")
369                || name.eq_ignore_ascii_case("Content-ID")
370            {
371                continue;
372            }
373
374            if let Ok(hdr) = hdr.rebuild() {
375                rebuilt.headers_mut().push(hdr);
376            }
377        }
378
379        Ok(rebuilt)
380    }
381
382    /// Write the message content to the provided output stream
383    pub fn write_message<W: std::io::Write>(&self, out: &mut W) -> Result<()> {
384        let line_ending = if self
385            .conformance
386            .contains(MessageConformance::NON_CANONICAL_LINE_ENDINGS)
387        {
388            "\n"
389        } else {
390            "\r\n"
391        };
392
393        for hdr in self.headers.iter() {
394            hdr.write_header(out)
395                .map_err(|_| MailParsingError::WriteMessageIOError)?;
396        }
397        out.write_all(line_ending.as_bytes())
398            .map_err(|_| MailParsingError::WriteMessageIOError)?;
399
400        if self.parts.is_empty() {
401            out.write_all(self.raw_body().as_bytes())
402                .map_err(|_| MailParsingError::WriteMessageIOError)?;
403        } else {
404            let info = Rfc2045Info::new(&self.headers)?;
405            let ct = info.content_type.ok_or({
406                MailParsingError::WriteMessageWtf(
407                    "expected to have Content-Type when there are child parts",
408                )
409            })?;
410            let boundary = ct.get("boundary").ok_or({
411                MailParsingError::WriteMessageWtf("expected Content-Type to have a boundary")
412            })?;
413            out.write_all(self.intro.as_bytes())
414                .map_err(|_| MailParsingError::WriteMessageIOError)?;
415            for p in &self.parts {
416                write!(out, "--{boundary}{line_ending}")
417                    .map_err(|_| MailParsingError::WriteMessageIOError)?;
418                p.write_message(out)?;
419            }
420            write!(out, "--{boundary}--{line_ending}")
421                .map_err(|_| MailParsingError::WriteMessageIOError)?;
422            out.write_all(self.outro.as_bytes())
423                .map_err(|_| MailParsingError::WriteMessageIOError)?;
424        }
425        Ok(())
426    }
427
428    /// Convenience method wrapping write_message that returns
429    /// the formatted message as a standalone string
430    pub fn to_message_string(&self) -> String {
431        let mut out = vec![];
432        self.write_message(&mut out).unwrap();
433        String::from_utf8_lossy(&out).to_string()
434    }
435
436    pub fn replace_text_body(&mut self, content_type: &str, content: &str) {
437        let mut new_part = Self::new_text(content_type, content);
438        self.bytes = new_part.bytes;
439        self.body_offset = new_part.body_offset;
440        self.body_len = new_part.body_len;
441        // Remove any rfc2047 headers that might reflect how the content
442        // is encoded. Note that we preserve Content-Disposition as that
443        // isn't related purely to the how the content is encoded
444        self.headers.remove_all_named("Content-Type");
445        self.headers.remove_all_named("Content-Transfer-Encoding");
446        // And add any from the new part
447        self.headers.append(&mut new_part.headers.headers);
448    }
449
450    /// Constructs a new part with textual utf8 content.
451    /// quoted-printable transfer encoding will be applied,
452    /// unless it is smaller to represent the text in base64
453    pub fn new_text(content_type: &str, content: &str) -> Self {
454        // We'll probably use qp, so speculatively do the work
455        let qp_encoded = quoted_printable::encode(content);
456
457        let (mut encoded, encoding) = if qp_encoded == content.as_bytes() {
458            (qp_encoded, None)
459        } else if qp_encoded.len() <= BASE64_RFC2045.encode_len(content.len()) {
460            (qp_encoded, Some("quoted-printable"))
461        } else {
462            // Turns out base64 will be smaller; perhaps the content
463            // is dominated by non-ASCII text?
464            (
465                BASE64_RFC2045.encode(content.as_bytes()).into_bytes(),
466                Some("base64"),
467            )
468        };
469
470        if !encoded.ends_with(b"\r\n") {
471            encoded.extend_from_slice(b"\r\n");
472        }
473        let mut headers = HeaderMap::default();
474
475        let mut ct = MimeParameters::new(content_type);
476        ct.set(
477            "charset",
478            if content.is_ascii() {
479                "us-ascii"
480            } else {
481                "utf-8"
482            },
483        );
484        headers.set_content_type(ct);
485
486        if let Some(encoding) = encoding {
487            headers.set_content_transfer_encoding(MimeParameters::new(encoding));
488        }
489
490        let body_len = encoded.len();
491        let bytes =
492            String::from_utf8(encoded).expect("transfer encoder to produce valid ASCII output");
493
494        Self {
495            bytes: bytes.into(),
496            headers,
497            body_offset: 0,
498            body_len,
499            conformance: MessageConformance::default(),
500            parts: vec![],
501            intro: "".into(),
502            outro: "".into(),
503        }
504    }
505
506    pub fn new_text_plain(content: &str) -> Self {
507        Self::new_text("text/plain", content)
508    }
509
510    pub fn new_html(content: &str) -> Self {
511        Self::new_text("text/html", content)
512    }
513
514    pub fn new_multipart(content_type: &str, parts: Vec<Self>, boundary: Option<&str>) -> Self {
515        let mut headers = HeaderMap::default();
516
517        let mut ct = MimeParameters::new(content_type);
518        match boundary {
519            Some(b) => {
520                ct.set("boundary", b);
521            }
522            None => {
523                // Generate a random boundary
524                let uuid = uuid::Uuid::new_v4();
525                let boundary = data_encoding::BASE64_NOPAD.encode(uuid.as_bytes());
526                ct.set("boundary", &boundary);
527            }
528        }
529        headers.set_content_type(ct);
530
531        Self {
532            bytes: "".into(),
533            headers,
534            body_offset: 0,
535            body_len: 0,
536            conformance: MessageConformance::default(),
537            parts,
538            intro: "".into(),
539            outro: "".into(),
540        }
541    }
542
543    pub fn new_binary(
544        content_type: &str,
545        content: &[u8],
546        options: Option<&AttachmentOptions>,
547    ) -> Self {
548        let mut encoded = BASE64_RFC2045.encode(content);
549        if !encoded.ends_with("\r\n") {
550            encoded.push_str("\r\n");
551        }
552        let mut headers = HeaderMap::default();
553
554        headers.set_content_type(MimeParameters::new(content_type));
555        headers.set_content_transfer_encoding(MimeParameters::new("base64"));
556
557        if let Some(opts) = options {
558            let mut cd = MimeParameters::new(if opts.inline { "inline" } else { "attachment" });
559            if let Some(name) = &opts.file_name {
560                cd.set("filename", name);
561            }
562            headers.set_content_disposition(cd);
563
564            if let Some(id) = &opts.content_id {
565                headers.set_content_id(MessageID(id.to_string()));
566            }
567        }
568
569        let body_len = encoded.len();
570
571        Self {
572            bytes: encoded.into(),
573            headers,
574            body_offset: 0,
575            body_len,
576            conformance: MessageConformance::default(),
577            parts: vec![],
578            intro: "".into(),
579            outro: "".into(),
580        }
581    }
582
583    /// Returns a SimplifiedStructure representation of the mime tree,
584    /// with the (probable) primary text/plain and text/html parts
585    /// pulled out, and the remaining parts recorded as a flat
586    /// attachments array
587    pub fn simplified_structure(&'a self) -> Result<SimplifiedStructure<'a>> {
588        let parts = self.simplified_structure_pointers()?;
589
590        let mut text = None;
591        let mut html = None;
592
593        let headers = &self
594            .resolve_ptr(parts.header_part)
595            .expect("header part to always be valid")
596            .headers;
597
598        if let Some(p) = parts.text_part.and_then(|p| self.resolve_ptr(p)) {
599            text = match p.body()? {
600                DecodedBody::Text(t) => Some(t),
601                DecodedBody::Binary(_) => {
602                    return Err(MailParsingError::BodyParse(
603                        "expected text/plain part to be text, but it is binary".to_string(),
604                    ))
605                }
606            };
607        }
608        if let Some(p) = parts.html_part.and_then(|p| self.resolve_ptr(p)) {
609            html = match p.body()? {
610                DecodedBody::Text(t) => Some(t),
611                DecodedBody::Binary(_) => {
612                    return Err(MailParsingError::BodyParse(
613                        "expected text/html part to be text, but it is binary".to_string(),
614                    ))
615                }
616            };
617        }
618
619        let mut attachments = vec![];
620        for ptr in parts.attachments {
621            attachments.push(self.resolve_ptr(ptr).expect("pointer to be valid").clone());
622        }
623
624        Ok(SimplifiedStructure {
625            text,
626            html,
627            headers,
628            attachments,
629        })
630    }
631
632    /// Resolve a PartPointer to the corresponding MimePart
633    pub fn resolve_ptr(&self, ptr: PartPointer) -> Option<&Self> {
634        let mut current = self;
635        let mut cursor = ptr.0.as_slice();
636
637        loop {
638            match cursor.first() {
639                Some(&idx) => {
640                    current = current.parts.get(idx as usize)?;
641                    cursor = &cursor[1..];
642                }
643                None => {
644                    // We have completed the walk
645                    return Some(current);
646                }
647            }
648        }
649    }
650
651    /// Resolve a PartPointer to the corresponding MimePart, for mutable access
652    pub fn resolve_ptr_mut(&mut self, ptr: PartPointer) -> Option<&mut Self> {
653        let mut current = self;
654        let mut cursor = ptr.0.as_slice();
655
656        loop {
657            match cursor.first() {
658                Some(&idx) => {
659                    current = current.parts.get_mut(idx as usize)?;
660                    cursor = &cursor[1..];
661                }
662                None => {
663                    // We have completed the walk
664                    return Some(current);
665                }
666            }
667        }
668    }
669
670    /// Returns a set of PartPointers that locate the (probable) primary
671    /// text/plain and text/html parts, and the remaining parts recorded
672    /// as a flat attachments array.  The resulting
673    /// PartPointers can be resolved to their actual instances for both
674    /// immutable and mutable operations via resolve_ptr and resolve_ptr_mut.
675    pub fn simplified_structure_pointers(&self) -> Result<SimplifiedStructurePointers> {
676        self.simplified_structure_pointers_impl(None)
677    }
678
679    fn simplified_structure_pointers_impl(
680        &self,
681        my_idx: Option<u8>,
682    ) -> Result<SimplifiedStructurePointers> {
683        let info = Rfc2045Info::new(&self.headers)?;
684        let is_inline = info
685            .attachment_options
686            .as_ref()
687            .map(|ao| ao.inline)
688            .unwrap_or(true);
689
690        if let Some(ct) = &info.content_type {
691            if is_inline {
692                if ct.value == "text/plain" {
693                    return Ok(SimplifiedStructurePointers {
694                        text_part: Some(PartPointer::root_or_nth(my_idx)),
695                        html_part: None,
696                        header_part: PartPointer::root_or_nth(my_idx),
697                        attachments: vec![],
698                    });
699                }
700                if ct.value == "text/html" {
701                    return Ok(SimplifiedStructurePointers {
702                        html_part: Some(PartPointer::root_or_nth(my_idx)),
703                        text_part: None,
704                        header_part: PartPointer::root_or_nth(my_idx),
705                        attachments: vec![],
706                    });
707                }
708            }
709
710            if ct.value.starts_with("multipart/") {
711                let mut text_part = None;
712                let mut html_part = None;
713                let mut attachments = vec![];
714
715                for (i, p) in self.parts.iter().enumerate() {
716                    let part_idx = i.try_into().map_err(|_| MailParsingError::TooManyParts)?;
717                    if let Ok(mut s) = p.simplified_structure_pointers_impl(Some(part_idx)) {
718                        if text_part.is_none() {
719                            if let Some(p) = s.text_part {
720                                text_part.replace(PartPointer::root_or_nth(my_idx).append(p));
721                            }
722                        }
723                        if html_part.is_none() {
724                            if let Some(p) = s.html_part {
725                                html_part.replace(PartPointer::root_or_nth(my_idx).append(p));
726                            }
727                        }
728                        attachments.append(&mut s.attachments);
729                    }
730                }
731
732                return Ok(SimplifiedStructurePointers {
733                    html_part,
734                    text_part,
735                    header_part: PartPointer::root_or_nth(my_idx),
736                    attachments,
737                });
738            }
739
740            return Ok(SimplifiedStructurePointers {
741                html_part: None,
742                text_part: None,
743                header_part: PartPointer::root_or_nth(my_idx),
744                attachments: vec![PartPointer::root_or_nth(my_idx)],
745            });
746        }
747
748        // Assume text/plain content-type
749        Ok(SimplifiedStructurePointers {
750            text_part: Some(PartPointer::root_or_nth(my_idx)),
751            html_part: None,
752            header_part: PartPointer::root_or_nth(my_idx),
753            attachments: vec![],
754        })
755    }
756}
757
758/// References the position of a MimePart by encoding the steps in
759/// a tree walking operation. The encoding of PartPointer is a
760/// sequence of integers that identify the index of a child part
761/// by its level within the mime tree, selecting the current node
762/// when no more indices remain. eg: `[]` indicates the
763/// root part, while `[0]` is the 0th child of the root.
764#[derive(Debug, Clone, PartialEq, Eq)]
765pub struct PartPointer(Vec<u8>);
766
767impl PartPointer {
768    /// Construct a PartPointer that references the root node
769    pub fn root() -> Self {
770        Self(vec![])
771    }
772
773    /// Construct a PartPointer that references either the nth
774    /// or the root node depending upon the passed parameter
775    pub fn root_or_nth(n: Option<u8>) -> Self {
776        match n {
777            Some(n) => Self::nth(n),
778            None => Self::root(),
779        }
780    }
781
782    /// Construct a PartPointer that references the nth child
783    pub fn nth(n: u8) -> Self {
784        Self(vec![n])
785    }
786
787    /// Join other onto self, consuming self and producing
788    /// a pointer that makes other relative to self
789    pub fn append(mut self, mut other: Self) -> Self {
790        self.0.append(&mut other.0);
791        Self(self.0)
792    }
793}
794
795#[derive(Debug, Clone)]
796pub struct SimplifiedStructurePointers {
797    /// The primary text/plain part
798    pub text_part: Option<PartPointer>,
799    /// The primary text/html part
800    pub html_part: Option<PartPointer>,
801    /// The "top level" set of headers for the message
802    pub header_part: PartPointer,
803    /// all other (terminal) parts are attachments
804    pub attachments: Vec<PartPointer>,
805}
806
807#[derive(Debug, Clone)]
808pub struct SimplifiedStructure<'a> {
809    pub text: Option<SharedString<'a>>,
810    pub html: Option<SharedString<'a>>,
811    pub headers: &'a HeaderMap<'a>,
812    pub attachments: Vec<MimePart<'a>>,
813}
814
815#[derive(Debug, Clone, PartialEq)]
816pub struct AttachmentOptions {
817    pub file_name: Option<String>,
818    pub inline: bool,
819    pub content_id: Option<String>,
820}
821
822#[derive(Debug, Clone, Copy, PartialEq, Eq)]
823pub enum ContentTransferEncoding {
824    SevenBit,
825    EightBit,
826    Binary,
827    QuotedPrintable,
828    Base64,
829}
830
831impl FromStr for ContentTransferEncoding {
832    type Err = MailParsingError;
833
834    fn from_str(s: &str) -> Result<Self> {
835        if s.eq_ignore_ascii_case("7bit") {
836            Ok(Self::SevenBit)
837        } else if s.eq_ignore_ascii_case("8bit") {
838            Ok(Self::EightBit)
839        } else if s.eq_ignore_ascii_case("binary") {
840            Ok(Self::Binary)
841        } else if s.eq_ignore_ascii_case("quoted-printable") {
842            Ok(Self::QuotedPrintable)
843        } else if s.eq_ignore_ascii_case("base64") {
844            Ok(Self::Base64)
845        } else {
846            Err(MailParsingError::InvalidContentTransferEncoding(
847                s.to_string(),
848            ))
849        }
850    }
851}
852
853#[derive(Debug, PartialEq)]
854pub enum DecodedBody<'a> {
855    Text(SharedString<'a>),
856    Binary(Vec<u8>),
857}
858
859impl<'a> DecodedBody<'a> {
860    pub fn to_string_lossy(&'a self) -> Cow<'a, str> {
861        match self {
862            Self::Text(s) => Cow::Borrowed(s),
863            Self::Binary(b) => String::from_utf8_lossy(b),
864        }
865    }
866}
867
868#[cfg(test)]
869mod test {
870    use super::*;
871
872    #[test]
873    fn msg_parsing() {
874        let message = concat!(
875            "Subject: hello there\n",
876            "From:  Someone <someone@example.com>\n",
877            "\n",
878            "I am the body"
879        );
880
881        let part = MimePart::parse(message).unwrap();
882        k9::assert_equal!(message, part.to_message_string());
883        assert_eq!(part.raw_body(), "I am the body");
884        k9::snapshot!(
885            part.body(),
886            r#"
887Ok(
888    Text(
889        "I am the body",
890    ),
891)
892"#
893        );
894
895        k9::snapshot!(
896            part.rebuild().unwrap().to_message_string(),
897            r#"
898Content-Type: text/plain;\r
899\tcharset="us-ascii"\r
900Subject: hello there\r
901From: Someone <someone@example.com>\r
902\r
903I am the body\r
904
905"#
906        );
907    }
908
909    #[test]
910    fn mime_encoded_body() {
911        let message = concat!(
912            "Subject: hello there\n",
913            "From: Someone <someone@example.com>\n",
914            "Mime-Version: 1.0\n",
915            "Content-Type: text/plain\n",
916            "Content-Transfer-Encoding: base64\n",
917            "\n",
918            "aGVsbG8K\n"
919        );
920
921        let part = MimePart::parse(message).unwrap();
922        k9::assert_equal!(message, part.to_message_string());
923        assert_eq!(part.raw_body(), "aGVsbG8K\n");
924        k9::snapshot!(
925            part.body(),
926            r#"
927Ok(
928    Text(
929        "hello
930",
931    ),
932)
933"#
934        );
935
936        k9::snapshot!(
937            part.rebuild().unwrap().to_message_string(),
938            r#"
939Content-Type: text/plain;\r
940\tcharset="us-ascii"\r
941Content-Transfer-Encoding: quoted-printable\r
942Subject: hello there\r
943From: Someone <someone@example.com>\r
944Mime-Version: 1.0\r
945\r
946hello=0A\r
947
948"#
949        );
950    }
951
952    #[test]
953    fn mime_multipart_1() {
954        let message = concat!(
955            "Subject: This is a test email\n",
956            "Content-Type: multipart/alternative; boundary=foobar\n",
957            "Mime-Version: 1.0\n",
958            "Date: Sun, 02 Oct 2016 07:06:22 -0700 (PDT)\n",
959            "\n",
960            "--foobar\n",
961            "Content-Type: text/plain; charset=utf-8\n",
962            "Content-Transfer-Encoding: quoted-printable\n",
963            "\n",
964            "This is the plaintext version, in utf-8. Proof by Euro: =E2=82=AC\n",
965            "--foobar\n",
966            "Content-Type: text/html\n",
967            "Content-Transfer-Encoding: base64\n",
968            "\n",
969            "PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \n",
970            "dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \n",
971            "--foobar--\n",
972            "After the final boundary stuff gets ignored.\n"
973        );
974
975        let part = MimePart::parse(message).unwrap();
976
977        k9::assert_equal!(message, part.to_message_string());
978
979        let children = part.child_parts();
980        k9::assert_equal!(children.len(), 2);
981
982        k9::snapshot!(
983            children[0].body(),
984            r#"
985Ok(
986    Text(
987        "This is the plaintext version, in utf-8. Proof by Euro: €\r
988",
989    ),
990)
991"#
992        );
993        k9::snapshot!(
994            children[1].body(),
995            r#"
996Ok(
997    Text(
998        "<html><body>This is the <b>HTML</b> version, in us-ascii. Proof by Euro: &euro;</body></html>
999",
1000    ),
1001)
1002"#
1003        );
1004    }
1005
1006    #[test]
1007    fn mutate_1() {
1008        let message = concat!(
1009            "Subject: This is a test email\r\n",
1010            "Content-Type: multipart/alternative; boundary=foobar\r\n",
1011            "Mime-Version: 1.0\r\n",
1012            "Date: Sun, 02 Oct 2016 07:06:22 -0700 (PDT)\r\n",
1013            "\r\n",
1014            "--foobar\r\n",
1015            "Content-Type: text/plain; charset=utf-8\r\n",
1016            "Content-Transfer-Encoding: quoted-printable\r\n",
1017            "\r\n",
1018            "This is the plaintext version, in utf-8. Proof by Euro: =E2=82=AC\r\n",
1019            "--foobar\r\n",
1020            "Content-Type: text/html\r\n",
1021            "Content-Transfer-Encoding: base64\r\n",
1022            "\r\n",
1023            "PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \r\n",
1024            "dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \r\n",
1025            "--foobar--\r\n",
1026            "After the final boundary stuff gets ignored.\r\n"
1027        );
1028
1029        let mut part = MimePart::parse(message).unwrap();
1030        k9::assert_equal!(message, part.to_message_string());
1031        fn munge(part: &mut MimePart) {
1032            let headers = part.headers_mut();
1033            headers.push(Header::with_name_value("X-Woot", "Hello"));
1034            headers.insert(0, Header::with_name_value("X-First", "at the top"));
1035            headers.retain(|hdr| !hdr.get_name().eq_ignore_ascii_case("date"));
1036        }
1037        munge(&mut part);
1038
1039        let re_encoded = part.to_message_string();
1040        k9::snapshot!(
1041            re_encoded,
1042            r#"
1043X-First: at the top\r
1044Subject: This is a test email\r
1045Content-Type: multipart/alternative; boundary=foobar\r
1046Mime-Version: 1.0\r
1047X-Woot: Hello\r
1048\r
1049--foobar\r
1050Content-Type: text/plain; charset=utf-8\r
1051Content-Transfer-Encoding: quoted-printable\r
1052\r
1053This is the plaintext version, in utf-8. Proof by Euro: =E2=82=AC\r
1054--foobar\r
1055Content-Type: text/html\r
1056Content-Transfer-Encoding: base64\r
1057\r
1058PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \r
1059dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \r
1060--foobar--\r
1061After the final boundary stuff gets ignored.\r
1062
1063"#
1064        );
1065
1066        eprintln!("part before mutate:\n{part:#?}");
1067
1068        part.child_parts_mut().retain(|part| {
1069            let ct = part.headers().content_type().unwrap().unwrap();
1070            ct.value == "text/html"
1071        });
1072
1073        eprintln!("part with html removed is:\n{part:#?}");
1074
1075        let re_encoded = part.to_message_string();
1076        k9::snapshot!(
1077            re_encoded,
1078            r#"
1079X-First: at the top\r
1080Subject: This is a test email\r
1081Content-Type: multipart/alternative; boundary=foobar\r
1082Mime-Version: 1.0\r
1083X-Woot: Hello\r
1084\r
1085--foobar\r
1086Content-Type: text/html\r
1087Content-Transfer-Encoding: base64\r
1088\r
1089PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \r
1090dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \r
1091--foobar--\r
1092After the final boundary stuff gets ignored.\r
1093
1094"#
1095        );
1096    }
1097
1098    #[test]
1099    fn replace_text_body() {
1100        let mut part = MimePart::new_text_plain("Hello 👻\r\n");
1101        let encoded = part.to_message_string();
1102        k9::snapshot!(
1103            &encoded,
1104            r#"
1105Content-Type: text/plain;\r
1106\tcharset="utf-8"\r
1107Content-Transfer-Encoding: base64\r
1108\r
1109SGVsbG8g8J+Ruw0K\r
1110
1111"#
1112        );
1113
1114        part.replace_text_body("text/plain", "Hello 🚀\r\n");
1115        let encoded = part.to_message_string();
1116        k9::snapshot!(
1117            &encoded,
1118            r#"
1119Content-Type: text/plain;\r
1120\tcharset="utf-8"\r
1121Content-Transfer-Encoding: base64\r
1122\r
1123SGVsbG8g8J+agA0K\r
1124
1125"#
1126        );
1127    }
1128
1129    #[test]
1130    fn construct_1() {
1131        let input_text = "Well, hello there! This is the plaintext version, in utf-8. Here's a Euro: €, and here are some emoji 👻 🍉 💩 and this long should be long enough that we wrap it in the returned part, let's see how that turns out!\r\n";
1132
1133        let part = MimePart::new_text_plain(input_text);
1134
1135        let encoded = part.to_message_string();
1136        k9::snapshot!(
1137            &encoded,
1138            r#"
1139Content-Type: text/plain;\r
1140\tcharset="utf-8"\r
1141Content-Transfer-Encoding: quoted-printable\r
1142\r
1143Well, hello there! This is the plaintext version, in utf-8. Here's a Euro: =\r
1144=E2=82=AC, and here are some emoji =F0=9F=91=BB =F0=9F=8D=89 =F0=9F=92=A9 a=\r
1145nd this long should be long enough that we wrap it in the returned part, le=\r
1146t's see how that turns out!\r
1147
1148"#
1149        );
1150
1151        let parsed_part = MimePart::parse(encoded.clone()).unwrap();
1152        k9::assert_equal!(encoded.as_str(), parsed_part.to_message_string().as_str());
1153        k9::assert_equal!(part.body().unwrap(), DecodedBody::Text(input_text.into()));
1154        k9::snapshot!(
1155            parsed_part.simplified_structure_pointers(),
1156            "
1157Ok(
1158    SimplifiedStructurePointers {
1159        text_part: Some(
1160            PartPointer(
1161                [],
1162            ),
1163        ),
1164        html_part: None,
1165        header_part: PartPointer(
1166            [],
1167        ),
1168        attachments: [],
1169    },
1170)
1171"
1172        );
1173    }
1174
1175    #[test]
1176    fn construct_2() {
1177        let msg = MimePart::new_multipart(
1178            "multipart/mixed",
1179            vec![
1180                MimePart::new_text_plain("plain text"),
1181                MimePart::new_html("<b>rich</b> text"),
1182                MimePart::new_binary(
1183                    "application/octet-stream",
1184                    &[0, 1, 2, 3],
1185                    Some(&AttachmentOptions {
1186                        file_name: Some("woot.bin".to_string()),
1187                        inline: false,
1188                        content_id: Some("woot.id".to_string()),
1189                    }),
1190                ),
1191            ],
1192            Some("my-boundary"),
1193        );
1194        k9::snapshot!(
1195            msg.to_message_string(),
1196            r#"
1197Content-Type: multipart/mixed;\r
1198\tboundary="my-boundary"\r
1199\r
1200--my-boundary\r
1201Content-Type: text/plain;\r
1202\tcharset="us-ascii"\r
1203\r
1204plain text\r
1205--my-boundary\r
1206Content-Type: text/html;\r
1207\tcharset="us-ascii"\r
1208\r
1209<b>rich</b> text\r
1210--my-boundary\r
1211Content-Type: application/octet-stream\r
1212Content-Transfer-Encoding: base64\r
1213Content-Disposition: attachment;\r
1214\tfilename="woot.bin"\r
1215Content-ID: <woot.id>\r
1216\r
1217AAECAw==\r
1218--my-boundary--\r
1219
1220"#
1221        );
1222
1223        k9::snapshot!(
1224            msg.simplified_structure_pointers(),
1225            "
1226Ok(
1227    SimplifiedStructurePointers {
1228        text_part: Some(
1229            PartPointer(
1230                [
1231                    0,
1232                ],
1233            ),
1234        ),
1235        html_part: Some(
1236            PartPointer(
1237                [
1238                    1,
1239                ],
1240            ),
1241        ),
1242        header_part: PartPointer(
1243            [],
1244        ),
1245        attachments: [
1246            PartPointer(
1247                [
1248                    2,
1249                ],
1250            ),
1251        ],
1252    },
1253)
1254"
1255        );
1256    }
1257
1258    #[test]
1259    fn funky_headers() {
1260        let message = concat!(
1261            "Subject\r\n",
1262            "Other:\r\n",
1263            "Content-Type: multipart/alternative; boundary=foobar\r\n",
1264            "Mime-Version: 1.0\r\n",
1265            "Date: Sun, 02 Oct 2016 07:06:22 -0700 (PDT)\r\n",
1266            "\r\n",
1267            "The body.\r\n"
1268        );
1269
1270        let part = MimePart::parse(message).unwrap();
1271        assert!(part
1272            .conformance()
1273            .contains(MessageConformance::MISSING_COLON_VALUE));
1274    }
1275
1276    /// This is a regression test for an issue where we'd interpret the
1277    /// binary bytes as default windows-1252 codepage charset, and mangle them.
1278    /// The high byte is sufficient to trigger the offending code prior
1279    /// to the fix
1280    #[test]
1281    fn rebuild_binary() {
1282        let expect = &[0, 1, 2, 3, 0xbe, 4, 5];
1283        let part = MimePart::new_binary("applicat/octet-stream", expect, None);
1284
1285        let rebuilt = part.rebuild().unwrap();
1286        let body = rebuilt.body().unwrap();
1287
1288        assert_eq!(body, DecodedBody::Binary(expect.to_vec()));
1289    }
1290}