mailparsing/
mimepart.rs

1use crate::header::{HeaderParseResult, MessageConformance};
2use crate::headermap::HeaderMap;
3use crate::strings::IntoSharedString;
4use crate::{
5    has_lone_cr_or_lf, BStringUtf8, Header, MailParsingError, MessageID, MimeParameterEncoding,
6    MimeParameters, Result, SharedString,
7};
8use bstr::{BStr, BString, ByteSlice};
9use charset_normalizer_rs::entity::NormalizerSettings;
10use charset_normalizer_rs::Encoding;
11use chrono::Utc;
12use serde::{Deserialize, Serialize};
13use serde_with::serde_as;
14use std::borrow::Cow;
15use std::str::FromStr;
16use std::sync::Arc;
17
18/// Define our own because data_encoding::BASE64_MIME, despite its name,
19/// is not RFC2045 compliant, and will not ignore spaces
20const BASE64_RFC2045: data_encoding::Encoding = data_encoding_macro::new_encoding! {
21    symbols: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
22    padding: '=',
23    ignore: " \r\n\t",
24    wrap_width: 76,
25    wrap_separator: "\r\n",
26};
27
28#[derive(Debug, Clone, PartialEq)]
29pub struct MimePart<'a> {
30    /// The bytes that comprise this part, from its beginning to its end
31    bytes: SharedString<'a>,
32    /// The parsed headers from the start of bytes
33    headers: HeaderMap<'a>,
34    /// The index into bytes of the first non-header byte.
35    body_offset: usize,
36    body_len: usize,
37    conformance: MessageConformance,
38    parts: Vec<Self>,
39    /// For multipart, the content the precedes the first boundary
40    intro: SharedString<'a>,
41    /// For multipart, the content the follows the last boundary
42    outro: SharedString<'a>,
43}
44
45#[derive(PartialEq, Debug)]
46pub struct Rfc2045Info {
47    pub encoding: ContentTransferEncoding,
48    pub charset: Result<&'static Encoding>,
49    pub content_type: Option<MimeParameters>,
50    pub is_text: bool,
51    pub is_multipart: bool,
52    pub attachment_options: Option<AttachmentOptions>,
53    pub invalid_mime_headers: bool,
54}
55
56impl Rfc2045Info {
57    // This must be infallible so that a basic mime structure can be parsed
58    // even if the mime headers are a bit borked
59    fn new(headers: &HeaderMap) -> Self {
60        let mut invalid_mime_headers = false;
61        let encoding = match headers.content_transfer_encoding() {
62            Ok(Some(cte)) => match cte
63                .value
64                .to_str()
65                .map_err(|_| ())
66                .and_then(|s| ContentTransferEncoding::from_str(s).map_err(|_| ()))
67            {
68                Ok(encoding) => encoding,
69                Err(_) => {
70                    invalid_mime_headers = true;
71                    ContentTransferEncoding::SevenBit
72                }
73            },
74            Ok(None) => ContentTransferEncoding::SevenBit,
75            Err(_) => {
76                invalid_mime_headers = true;
77                ContentTransferEncoding::SevenBit
78            }
79        };
80
81        let content_type = match headers.content_type() {
82            Ok(ct) => ct,
83            Err(_) => {
84                invalid_mime_headers = true;
85                None
86            }
87        };
88
89        let mut ct_name = None;
90        let charset = if let Some(ct) = &content_type {
91            ct_name = ct.get("name");
92            ct.get("charset")
93        } else {
94            None
95        };
96        let charset = charset.unwrap_or_else(|| "us-ascii".into());
97
98        let charset = match charset.to_str() {
99            Ok(charset) => Encoding::by_name(&*charset).ok_or_else(|| {
100                MailParsingError::BodyParse(format!("unsupported charset {charset}"))
101            }),
102            Err(_) => Err(MailParsingError::BodyParse(format!(
103                "non-ascii charset name {charset}"
104            ))),
105        };
106
107        let (is_text, is_multipart) = if let Some(ct) = &content_type {
108            (ct.is_text(), ct.is_multipart())
109        } else {
110            (true, false)
111        };
112
113        let mut inline = false;
114        let mut cd_file_name = None;
115
116        match headers.content_disposition() {
117            Ok(Some(cd)) => {
118                inline = cd.value == "inline";
119                cd_file_name = cd.get("filename");
120            }
121            Ok(None) => {}
122            Err(_) => {
123                invalid_mime_headers = true;
124            }
125        };
126
127        let content_id = match headers.content_id() {
128            Ok(cid) => cid.map(|cid| cid.0),
129            Err(_) => {
130                invalid_mime_headers = true;
131                None
132            }
133        };
134
135        let file_name = match (cd_file_name, ct_name) {
136            (Some(name), _) | (None, Some(name)) => Some(name),
137            (None, None) => None,
138        };
139
140        let attachment_options = if inline || file_name.is_some() || content_id.is_some() {
141            Some(AttachmentOptions {
142                file_name,
143                inline,
144                content_id,
145            })
146        } else {
147            None
148        };
149
150        Self {
151            encoding,
152            charset,
153            content_type,
154            is_text,
155            is_multipart,
156            attachment_options,
157            invalid_mime_headers,
158        }
159    }
160
161    pub fn content_type(&self) -> Option<&str> {
162        self.content_type
163            .as_ref()
164            .and_then(|params| params.value.to_str().ok())
165    }
166}
167
168impl<'a> MimePart<'a> {
169    /// Parse some data into a tree of MimeParts
170    pub fn parse<S>(bytes: S) -> Result<Self>
171    where
172        S: IntoSharedString<'a>,
173    {
174        let (bytes, base_conformance) = bytes.into_shared_string();
175        Self::parse_impl(bytes, base_conformance, true)
176    }
177
178    /// Obtain a version of self that has a static lifetime
179    pub fn to_owned(&self) -> MimePart<'static> {
180        MimePart {
181            bytes: self.bytes.to_owned(),
182            headers: self.headers.to_owned(),
183            body_offset: self.body_offset,
184            body_len: self.body_len,
185            conformance: self.conformance,
186            parts: self.parts.iter().map(|p| p.to_owned()).collect(),
187            intro: self.intro.to_owned(),
188            outro: self.outro.to_owned(),
189        }
190    }
191
192    fn parse_impl(
193        bytes: SharedString<'a>,
194        base_conformance: MessageConformance,
195        is_top_level: bool,
196    ) -> Result<Self> {
197        let HeaderParseResult {
198            headers,
199            body_offset,
200            overall_conformance: mut conformance,
201        } = Header::parse_headers(bytes.clone())?;
202
203        conformance |= base_conformance;
204
205        let body_len = bytes.len();
206
207        if !bytes.as_bytes().is_ascii() {
208            conformance.set(MessageConformance::NEEDS_TRANSFER_ENCODING, true);
209        }
210        {
211            let mut prev = 0;
212            for idx in memchr::memchr_iter(b'\n', bytes.as_bytes()) {
213                if idx - prev > 78 {
214                    conformance.set(MessageConformance::LINE_TOO_LONG, true);
215                    break;
216                }
217                prev = idx;
218            }
219        }
220        conformance.set(
221            MessageConformance::NON_CANONICAL_LINE_ENDINGS,
222            has_lone_cr_or_lf(bytes.as_bytes()),
223        );
224
225        if is_top_level {
226            conformance.set(
227                MessageConformance::MISSING_DATE_HEADER,
228                !matches!(headers.date(), Ok(Some(_))),
229            );
230            conformance.set(
231                MessageConformance::MISSING_MESSAGE_ID_HEADER,
232                !matches!(headers.message_id(), Ok(Some(_))),
233            );
234            conformance.set(
235                MessageConformance::MISSING_MIME_VERSION,
236                match headers.mime_version() {
237                    Ok(Some(v)) => v != "1.0",
238                    _ => true,
239                },
240            );
241        }
242
243        let mut part = Self {
244            bytes,
245            headers,
246            body_offset,
247            body_len,
248            conformance,
249            parts: vec![],
250            intro: SharedString::Borrowed(b""),
251            outro: SharedString::Borrowed(b""),
252        };
253
254        part.recursive_parse()?;
255
256        Ok(part)
257    }
258
259    fn recursive_parse(&mut self) -> Result<()> {
260        let info = Rfc2045Info::new(&self.headers);
261        if info.invalid_mime_headers {
262            self.conformance |= MessageConformance::INVALID_MIME_HEADERS;
263        }
264        if let Some((boundary, true)) = info
265            .content_type
266            .as_ref()
267            .and_then(|ct| ct.get("boundary").map(|b| (b, info.is_multipart)))
268        {
269            let boundary = format!("\n--{boundary}");
270            let raw_body = self
271                .bytes
272                .slice(self.body_offset.saturating_sub(1)..self.bytes.len());
273
274            let mut iter = memchr::memmem::find_iter(raw_body.as_bytes(), &boundary);
275            if let Some(first_boundary_pos) = iter.next() {
276                self.intro = raw_body.slice(0..first_boundary_pos);
277
278                // When we create parts, we ignore the original body span in
279                // favor of what we're parsing out here now
280                self.body_len = 0;
281
282                let mut boundary_end = first_boundary_pos + boundary.len();
283
284                while let Some(part_start) =
285                    memchr::memchr(b'\n', &raw_body.as_bytes()[boundary_end..])
286                        .map(|p| p + boundary_end + 1)
287                {
288                    let part_end = iter
289                        .next()
290                        .map(|p| {
291                            // P is the newline; we want to include it in the raw
292                            // bytes for this part, so look beyond it
293                            p + 1
294                        })
295                        .unwrap_or(raw_body.len());
296
297                    let child = Self::parse_impl(
298                        raw_body.slice(part_start..part_end),
299                        MessageConformance::default(),
300                        false,
301                    )?;
302                    self.conformance |= child.conformance;
303                    self.parts.push(child);
304
305                    boundary_end = part_end -
306                        1 /* newline we adjusted for when assigning part_end */
307                        + boundary.len();
308
309                    if boundary_end + 2 > raw_body.len() {
310                        break;
311                    }
312                    if &raw_body.as_bytes()[boundary_end..boundary_end + 2] == b"--" {
313                        if let Some(after_boundary) =
314                            memchr::memchr(b'\n', &raw_body.as_bytes()[boundary_end..])
315                                .map(|p| p + boundary_end + 1)
316                        {
317                            self.outro = raw_body.slice(after_boundary..raw_body.len());
318                        }
319                        break;
320                    }
321                }
322            }
323        }
324
325        Ok(())
326    }
327
328    /// Recursively performs deeper conformance checks on the message.
329    /// At this time that includes attempting to decode any text parts
330    /// into UTF-8 to see if they are correctly annotated, but it may
331    /// include more checks in the future.
332    /// The results of the deep checks are combined with any conformance
333    /// issues detected during parsing, and returned.
334    pub fn deep_conformance_check(&self) -> MessageConformance {
335        if self.parts.is_empty() {
336            match self.extract_body(None) {
337                Ok((_, conformance)) => conformance,
338                Err(_) => self.conformance | MessageConformance::NEEDS_TRANSFER_ENCODING,
339            }
340        } else {
341            let mut conformance = self.conformance;
342            for p in &self.parts {
343                conformance |= p.deep_conformance_check();
344            }
345            conformance
346        }
347    }
348
349    /// Returns the conformance flags determined during parsing
350    pub fn conformance(&self) -> MessageConformance {
351        self.conformance
352    }
353
354    /// Obtain a reference to the child parts
355    pub fn child_parts(&self) -> &[Self] {
356        &self.parts
357    }
358
359    /// Obtain a mutable reference to the child parts
360    pub fn child_parts_mut(&mut self) -> &mut Vec<Self> {
361        &mut self.parts
362    }
363
364    /// Obtains a reference to the headers
365    pub fn headers(&'_ self) -> &'_ HeaderMap<'_> {
366        &self.headers
367    }
368
369    /// Obtain a mutable reference to the headers
370    pub fn headers_mut<'b>(&'b mut self) -> &'b mut HeaderMap<'a> {
371        &mut self.headers
372    }
373
374    /// Get the raw, transfer-encoded body
375    pub fn raw_body(&'_ self) -> SharedString<'_> {
376        self.bytes
377            .slice(self.body_offset..self.body_len.max(self.body_offset))
378    }
379
380    pub fn rfc2045_info(&self) -> Rfc2045Info {
381        Rfc2045Info::new(&self.headers)
382    }
383
384    /// Decode transfer decoding and return the body
385    pub fn body(&'_ self) -> Result<DecodedBody<'_>> {
386        let (body, _conformance) = self.extract_body(None)?;
387        Ok(body)
388    }
389
390    fn extract_body(
391        &'_ self,
392        options: Option<&CheckFixSettings>,
393    ) -> Result<(DecodedBody<'_>, MessageConformance)> {
394        let info = Rfc2045Info::new(&self.headers);
395
396        let bytes = match info.encoding {
397            ContentTransferEncoding::Base64 => {
398                let data = self.raw_body();
399                let bytes = data.as_bytes();
400                BASE64_RFC2045.decode(bytes).map_err(|err| {
401                    let b = bytes[err.position] as char;
402                    let region =
403                        &bytes[err.position.saturating_sub(8)..(err.position + 8).min(bytes.len())];
404                    let region = String::from_utf8_lossy(region);
405                    MailParsingError::BodyParse(format!(
406                        "base64 decode: {err:#} b={b:?} in {region}"
407                    ))
408                })?
409            }
410            ContentTransferEncoding::QuotedPrintable => quoted_printable::decode(
411                self.raw_body().as_bytes(),
412                quoted_printable::ParseMode::Robust,
413            )
414            .map_err(|err| {
415                MailParsingError::BodyParse(format!("quoted printable decode: {err:#}"))
416            })?,
417            ContentTransferEncoding::SevenBit
418            | ContentTransferEncoding::EightBit
419            | ContentTransferEncoding::Binary => self.raw_body().as_bytes().to_vec(),
420        };
421
422        if info.is_text {
423            let charset = info.charset?;
424
425            match charset.decode_simple(&bytes) {
426                Ok(decoded) => Ok((
427                    DecodedBody::Text(decoded.to_string().into()),
428                    self.conformance,
429                )),
430                Err(_err) => {
431                    if let Some(settings) = options {
432                        if settings.detect_encoding {
433                            let norm_settings = NormalizerSettings {
434                                include_encodings: settings.include_encodings.clone(),
435                                exclude_encodings: settings.exclude_encodings.clone(),
436                                ..Default::default()
437                            };
438
439                            if let Ok(guess) =
440                                charset_normalizer_rs::from_bytes(&*bytes, Some(norm_settings))
441                            {
442                                if let Some(decoded) =
443                                    guess.get_best().and_then(|best| best.decoded_payload())
444                                {
445                                    return Ok((
446                                        DecodedBody::Text(decoded.to_string().into()),
447                                        MessageConformance::NEEDS_TRANSFER_ENCODING
448                                            | self.conformance,
449                                    ));
450                                }
451                            }
452
453                            // No charset was detected.  This is a strong indicator
454                            // that the content is actually binary, according to
455                            // the docs of the detector, but we know that it should
456                            // be text.  Regardless, we can't represent it as UTF-8
457                            // here.
458                            // We'll return it as a binary part and let the caller
459                            // decide if that is an issue
460                            return Ok((
461                                DecodedBody::Binary(bytes),
462                                MessageConformance::NEEDS_TRANSFER_ENCODING | self.conformance,
463                            ));
464                        }
465                    }
466
467                    // We don't know what the charset is, just that this should
468                    // be some kind of text.  For the sake of compatibility with
469                    // international email, let's try it as UTF-8, and if that
470                    // sticks, we'll use it.
471                    if let Ok(decoded) = std::str::from_utf8(&bytes) {
472                        return Ok((
473                            DecodedBody::Text(decoded.to_string().into()),
474                            MessageConformance::NEEDS_TRANSFER_ENCODING | self.conformance,
475                        ));
476                    }
477
478                    // Who knows what it is? Return it as binary and leave the
479                    // final decision on what to do with it to our caller.
480                    Ok((
481                        DecodedBody::Binary(bytes),
482                        MessageConformance::NEEDS_TRANSFER_ENCODING | self.conformance,
483                    ))
484                }
485            }
486        } else {
487            Ok((DecodedBody::Binary(bytes), self.conformance))
488        }
489    }
490
491    /// Re-constitute the message.
492    /// Each element will be parsed out, and the parsed form used
493    /// to build a new message.
494    /// This has the side effect of "fixing" non-conforming elements,
495    /// but may come at the cost of "losing" the non-sensical or otherwise
496    /// out of spec elements in the rebuilt message
497    pub fn rebuild(&self, settings: Option<&CheckFixSettings>) -> Result<Self> {
498        let info = Rfc2045Info::new(&self.headers);
499
500        let mut children = vec![];
501        for part in &self.parts {
502            children.push(part.rebuild(settings)?);
503        }
504
505        let mut rebuilt = if children.is_empty() {
506            let (body, _conformance) = self.extract_body(settings)?;
507            match body {
508                DecodedBody::Text(text) => {
509                    let ct = info
510                        .content_type
511                        .as_ref()
512                        .map(|ct| ct.value.as_bstr())
513                        .unwrap_or_else(|| BStr::new("text/plain"));
514                    Self::new_text(ct, text.as_bytes())?
515                }
516                DecodedBody::Binary(data) => {
517                    let ct = info
518                        .content_type
519                        .as_ref()
520                        .map(|ct| ct.value.as_bstr())
521                        .unwrap_or_else(|| BStr::new("application/octet-stream"));
522                    Self::new_binary(ct, &data, info.attachment_options.as_ref())?
523                }
524            }
525        } else {
526            let ct = info.content_type.ok_or_else(|| {
527                MailParsingError::BodyParse(
528                    "multipart message has no content-type information!?".to_string(),
529                )
530            })?;
531            Self::new_multipart(
532                &ct.value,
533                children,
534                ct.get("boundary").as_deref().map(|b| b.as_bytes()),
535            )?
536        };
537
538        for hdr in self.headers.iter() {
539            let name = hdr.get_name();
540            if name.eq_ignore_ascii_case(b"Content-ID") {
541                continue;
542            }
543
544            // Merge in any MimeParameters that we might otherwise have lost
545            // in the rebuild
546            if name.eq_ignore_ascii_case(b"Content-Type") {
547                if let Ok(params) = hdr.as_content_type() {
548                    let Some(mut dest) = rebuilt.headers_mut().content_type()? else {
549                        continue;
550                    };
551
552                    for (k, v) in params.parameter_map() {
553                        if dest.get(&k).is_none() {
554                            dest.set(&k, &v);
555                        }
556                    }
557
558                    rebuilt.headers_mut().set_content_type(dest)?;
559                }
560                continue;
561            }
562            if name.eq_ignore_ascii_case(b"Content-Transfer-Encoding") {
563                if let Ok(params) = hdr.as_content_transfer_encoding() {
564                    let Some(mut dest) = rebuilt.headers_mut().content_transfer_encoding()? else {
565                        continue;
566                    };
567
568                    for (k, v) in params.parameter_map() {
569                        if dest.get(&k).is_none() {
570                            dest.set(&k, &v);
571                        }
572                    }
573
574                    rebuilt.headers_mut().set_content_transfer_encoding(dest)?;
575                }
576                continue;
577            }
578            if name.eq_ignore_ascii_case(b"Content-Disposition") {
579                if let Ok(params) = hdr.as_content_disposition() {
580                    let Some(mut dest) = rebuilt.headers_mut().content_disposition()? else {
581                        continue;
582                    };
583
584                    for (k, v) in params.parameter_map() {
585                        if dest.get(&k).is_none() {
586                            dest.set(&k, &v);
587                        }
588                    }
589
590                    rebuilt.headers_mut().set_content_disposition(dest)?;
591                }
592                continue;
593            }
594
595            if let Ok(hdr) = hdr.rebuild() {
596                rebuilt.headers_mut().push(hdr);
597            }
598        }
599
600        Ok(rebuilt)
601    }
602
603    /// Write the message content to the provided output stream
604    pub fn write_message<W: std::io::Write>(&self, out: &mut W) -> Result<()> {
605        let line_ending = if self
606            .conformance
607            .contains(MessageConformance::NON_CANONICAL_LINE_ENDINGS)
608        {
609            "\n"
610        } else {
611            "\r\n"
612        };
613
614        for hdr in self.headers.iter() {
615            hdr.write_header(out)
616                .map_err(|_| MailParsingError::WriteMessageIOError)?;
617        }
618        out.write_all(line_ending.as_bytes())
619            .map_err(|_| MailParsingError::WriteMessageIOError)?;
620
621        if self.parts.is_empty() {
622            out.write_all(self.raw_body().as_bytes())
623                .map_err(|_| MailParsingError::WriteMessageIOError)?;
624        } else {
625            let info = Rfc2045Info::new(&self.headers);
626            let ct = info.content_type.ok_or({
627                MailParsingError::WriteMessageWtf(
628                    "expected to have Content-Type when there are child parts",
629                )
630            })?;
631            let boundary = ct.get("boundary").ok_or({
632                MailParsingError::WriteMessageWtf("expected Content-Type to have a boundary")
633            })?;
634            out.write_all(self.intro.as_bytes())
635                .map_err(|_| MailParsingError::WriteMessageIOError)?;
636            for p in &self.parts {
637                write!(out, "--{boundary}{line_ending}")
638                    .map_err(|_| MailParsingError::WriteMessageIOError)?;
639                p.write_message(out)?;
640            }
641            write!(out, "--{boundary}--{line_ending}")
642                .map_err(|_| MailParsingError::WriteMessageIOError)?;
643            out.write_all(self.outro.as_bytes())
644                .map_err(|_| MailParsingError::WriteMessageIOError)?;
645        }
646        Ok(())
647    }
648
649    /// Convenience method wrapping write_message that returns
650    /// the formatted message as a standalone string
651    pub fn to_message_bytes(&self) -> Vec<u8> {
652        let mut out = vec![];
653        self.write_message(&mut out).unwrap();
654        out
655    }
656
657    pub fn replace_text_body(
658        &mut self,
659        content_type: impl AsRef<[u8]>,
660        content: impl AsRef<BStr>,
661    ) -> Result<()> {
662        let mut new_part = Self::new_text(content_type, content)?;
663        self.bytes = new_part.bytes;
664        self.body_offset = new_part.body_offset;
665        self.body_len = new_part.body_len;
666        // Remove any rfc2047 headers that might reflect how the content
667        // is encoded. Note that we preserve Content-Disposition as that
668        // isn't related purely to the how the content is encoded
669        self.headers.remove_all_named("Content-Type");
670        self.headers.remove_all_named("Content-Transfer-Encoding");
671        // And add any from the new part
672        self.headers.append(&mut new_part.headers.headers);
673        Ok(())
674    }
675
676    pub fn replace_binary_body(&mut self, content_type: &[u8], content: &[u8]) -> Result<()> {
677        let mut new_part = Self::new_binary(content_type, content, None)?;
678        self.bytes = new_part.bytes;
679        self.body_offset = new_part.body_offset;
680        self.body_len = new_part.body_len;
681        // Remove any rfc2047 headers that might reflect how the content
682        // is encoded. Note that we preserve Content-Disposition as that
683        // isn't related purely to the how the content is encoded
684        self.headers.remove_all_named("Content-Type");
685        self.headers.remove_all_named("Content-Transfer-Encoding");
686        // And add any from the new part
687        self.headers.append(&mut new_part.headers.headers);
688        Ok(())
689    }
690
691    pub fn new_no_transfer_encoding(content_type: &str, bytes: &[u8]) -> Result<Self> {
692        if bytes.iter().any(|b| !b.is_ascii()) {
693            return Err(MailParsingError::EightBit);
694        }
695
696        let mut headers = HeaderMap::default();
697
698        let ct = MimeParameters::new(content_type);
699        headers.set_content_type(ct)?;
700
701        let bytes = String::from_utf8_lossy(bytes).to_string();
702        let body_len = bytes.len();
703
704        Ok(Self {
705            bytes: bytes.into(),
706            headers,
707            body_offset: 0,
708            body_len,
709            conformance: MessageConformance::default(),
710            parts: vec![],
711            intro: "".into(),
712            outro: "".into(),
713        })
714    }
715
716    /// Constructs a new part with textual utf8 content.
717    /// quoted-printable transfer encoding will be applied,
718    /// unless it is smaller to represent the text in base64
719    pub fn new_text(content_type: impl AsRef<[u8]>, content: impl AsRef<BStr>) -> Result<Self> {
720        let content = content.as_ref();
721        // We'll probably use qp, so speculatively do the work
722        let qp_encoded = quoted_printable::encode(content);
723
724        let (mut encoded, encoding) = if qp_encoded == content {
725            (qp_encoded, None)
726        } else if qp_encoded.len() <= BASE64_RFC2045.encode_len(content.len()) {
727            (qp_encoded, Some("quoted-printable"))
728        } else {
729            // Turns out base64 will be smaller; perhaps the content
730            // is dominated by non-ASCII text?
731            (BASE64_RFC2045.encode(content).into_bytes(), Some("base64"))
732        };
733
734        if !encoded.ends_with(b"\r\n") {
735            encoded.extend_from_slice(b"\r\n");
736        }
737        let mut headers = HeaderMap::default();
738
739        let mut ct = MimeParameters::new(content_type);
740        ct.set(
741            "charset",
742            if content.is_ascii() {
743                "us-ascii"
744            } else {
745                "utf-8"
746            },
747        );
748        headers.set_content_type(ct)?;
749
750        if let Some(encoding) = encoding {
751            headers.set_content_transfer_encoding(MimeParameters::new(encoding))?;
752        }
753
754        let body_len = encoded.len();
755        let bytes =
756            String::from_utf8(encoded).expect("transfer encoder to produce valid ASCII output");
757
758        Ok(Self {
759            bytes: bytes.into(),
760            headers,
761            body_offset: 0,
762            body_len,
763            conformance: MessageConformance::default(),
764            parts: vec![],
765            intro: "".into(),
766            outro: "".into(),
767        })
768    }
769
770    pub fn new_text_plain(content: impl AsRef<BStr>) -> Result<Self> {
771        Self::new_text("text/plain", content)
772    }
773
774    pub fn new_html(content: impl AsRef<BStr>) -> Result<Self> {
775        Self::new_text("text/html", content)
776    }
777
778    pub fn new_multipart(
779        content_type: impl AsRef<[u8]>,
780        parts: Vec<Self>,
781        boundary: Option<&[u8]>,
782    ) -> Result<Self> {
783        let mut headers = HeaderMap::default();
784
785        let mut ct = MimeParameters::new(content_type);
786        match boundary {
787            Some(b) => {
788                ct.set("boundary", b);
789            }
790            None => {
791                // Generate a random boundary
792                let uuid = uuid::Uuid::new_v4();
793                let boundary = data_encoding::BASE64_NOPAD.encode(uuid.as_bytes());
794                ct.set("boundary", &boundary);
795            }
796        }
797        headers.set_content_type(ct)?;
798
799        Ok(Self {
800            bytes: "".into(),
801            headers,
802            body_offset: 0,
803            body_len: 0,
804            conformance: MessageConformance::default(),
805            parts,
806            intro: "".into(),
807            outro: "".into(),
808        })
809    }
810
811    pub fn new_binary(
812        content_type: impl AsRef<[u8]>,
813        content: &[u8],
814        options: Option<&AttachmentOptions>,
815    ) -> Result<Self> {
816        let mut encoded = BASE64_RFC2045.encode(content);
817        if !encoded.ends_with("\r\n") {
818            encoded.push_str("\r\n");
819        }
820        let mut headers = HeaderMap::default();
821
822        let mut ct = MimeParameters::new(content_type);
823
824        if let Some(opts) = options {
825            let mut cd = MimeParameters::new(if opts.inline { "inline" } else { "attachment" });
826            if let Some(name) = &opts.file_name {
827                cd.set("filename", name);
828                let encoding = if name.chars().any(|c| !c.is_ascii()) {
829                    MimeParameterEncoding::QuotedRfc2047
830                } else {
831                    MimeParameterEncoding::None
832                };
833                ct.set_with_encoding("name", name, encoding);
834            }
835            headers.set_content_disposition(cd)?;
836
837            if let Some(id) = &opts.content_id {
838                headers.set_content_id(MessageID(id.clone()))?;
839            }
840        }
841
842        headers.set_content_type(ct)?;
843        headers.set_content_transfer_encoding(MimeParameters::new("base64"))?;
844
845        let body_len = encoded.len();
846
847        Ok(Self {
848            bytes: encoded.into(),
849            headers,
850            body_offset: 0,
851            body_len,
852            conformance: MessageConformance::default(),
853            parts: vec![],
854            intro: "".into(),
855            outro: "".into(),
856        })
857    }
858
859    /// Returns a SimplifiedStructure representation of the mime tree,
860    /// with the (probable) primary text/plain and text/html parts
861    /// pulled out, and the remaining parts recorded as a flat
862    /// attachments array
863    pub fn simplified_structure(&'a self) -> Result<SimplifiedStructure<'a>> {
864        let parts = self.simplified_structure_pointers()?;
865
866        let mut text = None;
867        let mut html = None;
868        let mut amp_html = None;
869
870        let headers = &self
871            .resolve_ptr(parts.header_part)
872            .expect("header part to always be valid")
873            .headers;
874
875        if let Some(p) = parts.text_part.and_then(|p| self.resolve_ptr(p)) {
876            text = match p.body()? {
877                DecodedBody::Text(t) => Some(t),
878                DecodedBody::Binary(_) => {
879                    return Err(MailParsingError::BodyParse(
880                        "expected text/plain part to be text, but it is binary".to_string(),
881                    ))
882                }
883            };
884        }
885        if let Some(p) = parts.html_part.and_then(|p| self.resolve_ptr(p)) {
886            html = match p.body()? {
887                DecodedBody::Text(t) => Some(t),
888                DecodedBody::Binary(_) => {
889                    return Err(MailParsingError::BodyParse(
890                        "expected text/html part to be text, but it is binary".to_string(),
891                    ))
892                }
893            };
894        }
895        if let Some(p) = parts.amp_html_part.and_then(|p| self.resolve_ptr(p)) {
896            amp_html = match p.body()? {
897                DecodedBody::Text(t) => Some(t),
898                DecodedBody::Binary(_) => {
899                    return Err(MailParsingError::BodyParse(
900                        "expected text/x-amp-html part to be text, but it is binary".to_string(),
901                    ))
902                }
903            };
904        }
905
906        let mut attachments = vec![];
907        for ptr in parts.attachments {
908            attachments.push(self.resolve_ptr(ptr).expect("pointer to be valid").clone());
909        }
910
911        Ok(SimplifiedStructure {
912            text,
913            html,
914            amp_html,
915            headers,
916            attachments,
917        })
918    }
919
920    /// Resolve a PartPointer to the corresponding MimePart
921    pub fn resolve_ptr(&self, ptr: PartPointer) -> Option<&Self> {
922        let mut current = self;
923        let mut cursor = ptr.0.as_slice();
924
925        loop {
926            match cursor.first() {
927                Some(&idx) => {
928                    current = current.parts.get(idx as usize)?;
929                    cursor = &cursor[1..];
930                }
931                None => {
932                    // We have completed the walk
933                    return Some(current);
934                }
935            }
936        }
937    }
938
939    /// Resolve a PartPointer to the corresponding MimePart, for mutable access
940    pub fn resolve_ptr_mut(&mut self, ptr: PartPointer) -> Option<&mut Self> {
941        let mut current = self;
942        let mut cursor = ptr.0.as_slice();
943
944        loop {
945            match cursor.first() {
946                Some(&idx) => {
947                    current = current.parts.get_mut(idx as usize)?;
948                    cursor = &cursor[1..];
949                }
950                None => {
951                    // We have completed the walk
952                    return Some(current);
953                }
954            }
955        }
956    }
957
958    /// Returns a set of PartPointers that locate the (probable) primary
959    /// text/plain and text/html parts, and the remaining parts recorded
960    /// as a flat attachments array.  The resulting
961    /// PartPointers can be resolved to their actual instances for both
962    /// immutable and mutable operations via resolve_ptr and resolve_ptr_mut.
963    pub fn simplified_structure_pointers(&self) -> Result<SimplifiedStructurePointers> {
964        self.simplified_structure_pointers_impl(None)
965    }
966
967    fn simplified_structure_pointers_impl(
968        &self,
969        my_idx: Option<u8>,
970    ) -> Result<SimplifiedStructurePointers> {
971        let info = Rfc2045Info::new(&self.headers);
972        let is_inline = info
973            .attachment_options
974            .as_ref()
975            .map(|ao| ao.inline)
976            .unwrap_or(true);
977
978        if let Some(ct) = &info.content_type {
979            if is_inline {
980                if ct.value == "text/plain" {
981                    return Ok(SimplifiedStructurePointers {
982                        amp_html_part: None,
983                        text_part: Some(PartPointer::root_or_nth(my_idx)),
984                        html_part: None,
985                        header_part: PartPointer::root_or_nth(my_idx),
986                        attachments: vec![],
987                    });
988                }
989                if ct.value == "text/html" {
990                    return Ok(SimplifiedStructurePointers {
991                        amp_html_part: None,
992                        html_part: Some(PartPointer::root_or_nth(my_idx)),
993                        text_part: None,
994                        header_part: PartPointer::root_or_nth(my_idx),
995                        attachments: vec![],
996                    });
997                }
998                if ct.value == "text/x-amp-html" {
999                    return Ok(SimplifiedStructurePointers {
1000                        amp_html_part: Some(PartPointer::root_or_nth(my_idx)),
1001                        html_part: None,
1002                        text_part: None,
1003                        header_part: PartPointer::root_or_nth(my_idx),
1004                        attachments: vec![],
1005                    });
1006                }
1007            }
1008
1009            if ct.value.starts_with_str("multipart/") {
1010                let mut text_part = None;
1011                let mut html_part = None;
1012                let mut amp_html_part = None;
1013                let mut attachments = vec![];
1014
1015                for (i, p) in self.parts.iter().enumerate() {
1016                    let part_idx = i.try_into().map_err(|_| MailParsingError::TooManyParts)?;
1017                    if let Ok(s) = p.simplified_structure_pointers_impl(Some(part_idx)) {
1018                        if let Some(p) = s.text_part {
1019                            let ptr = PartPointer::root_or_nth(my_idx).append(p);
1020                            if text_part.is_none() {
1021                                text_part.replace(ptr);
1022                            } else {
1023                                attachments.push(ptr);
1024                            }
1025                        }
1026                        if let Some(p) = s.html_part {
1027                            let ptr = PartPointer::root_or_nth(my_idx).append(p);
1028                            if html_part.is_none() {
1029                                html_part.replace(ptr);
1030                            } else {
1031                                attachments.push(ptr);
1032                            }
1033                        }
1034                        if let Some(p) = s.amp_html_part {
1035                            let ptr = PartPointer::root_or_nth(my_idx).append(p);
1036                            if amp_html_part.is_none() {
1037                                amp_html_part.replace(ptr);
1038                            } else {
1039                                attachments.push(ptr);
1040                            }
1041                        }
1042                        for attachment in s.attachments {
1043                            attachments.push(PartPointer::root_or_nth(my_idx).append(attachment));
1044                        }
1045                    }
1046                }
1047
1048                return Ok(SimplifiedStructurePointers {
1049                    amp_html_part,
1050                    html_part,
1051                    text_part,
1052                    header_part: PartPointer::root_or_nth(my_idx),
1053                    attachments,
1054                });
1055            }
1056
1057            return Ok(SimplifiedStructurePointers {
1058                html_part: None,
1059                text_part: None,
1060                amp_html_part: None,
1061                header_part: PartPointer::root_or_nth(my_idx),
1062                attachments: vec![PartPointer::root_or_nth(my_idx)],
1063            });
1064        }
1065
1066        // Assume text/plain content-type
1067        Ok(SimplifiedStructurePointers {
1068            text_part: Some(PartPointer::root_or_nth(my_idx)),
1069            html_part: None,
1070            amp_html_part: None,
1071            header_part: PartPointer::root_or_nth(my_idx),
1072            attachments: vec![],
1073        })
1074    }
1075
1076    pub fn check_fix_conformance(
1077        &self,
1078        check: MessageConformance,
1079        fix: MessageConformance,
1080        settings: CheckFixSettings,
1081    ) -> Result<Option<Self>> {
1082        let mut msg = self.clone();
1083        let conformance = msg.deep_conformance_check();
1084
1085        // Don't raise errors for things that we're going to fix anyway
1086        let check = check - fix;
1087
1088        if check.intersects(conformance) {
1089            let problems = check.intersection(conformance);
1090            return Err(MailParsingError::ConformanceIssues(problems));
1091        }
1092
1093        if !fix.intersects(conformance) {
1094            return Ok(None);
1095        }
1096
1097        let to_fix = fix.intersection(conformance);
1098
1099        let missing_headers_only = to_fix
1100            .difference(
1101                MessageConformance::MISSING_DATE_HEADER
1102                    | MessageConformance::MISSING_MIME_VERSION
1103                    | MessageConformance::MISSING_MESSAGE_ID_HEADER,
1104            )
1105            .is_empty();
1106
1107        if !missing_headers_only {
1108            if to_fix.contains(MessageConformance::NEEDS_TRANSFER_ENCODING) {
1109                // Something is 8-bit. If we're lucky, it's simply UTF-8,
1110                // but it could be some other "legacy" charset encoding.
1111                // If we've been asked to detect an encoding, try that now,
1112                // and re-parse the message with the re-coded input.
1113                // Otherwise, we'll attempt a lossy conversion to UTF-8
1114                // and the resulting message will likely include unicode
1115                // replacement characters.
1116
1117                if settings.detect_encoding {
1118                    if let Some(data_bytes) = &settings.data_bytes {
1119                        let norm_settings = NormalizerSettings {
1120                            include_encodings: settings.include_encodings.clone(),
1121                            exclude_encodings: settings.exclude_encodings.clone(),
1122                            ..Default::default()
1123                        };
1124
1125                        let guess =
1126                            charset_normalizer_rs::from_bytes(&*data_bytes, Some(norm_settings))
1127                                .map_err(|err| MailParsingError::CharsetDetectionFailed(err))?;
1128                        if let Some(best) = guess.get_best() {
1129                            if let Some(decoded) = best.decoded_payload() {
1130                                msg = MimePart::parse(decoded.to_string())?;
1131                            }
1132                        }
1133                    }
1134                }
1135            }
1136
1137            msg = msg.rebuild(Some(&settings))?;
1138        }
1139
1140        if to_fix.contains(MessageConformance::MISSING_DATE_HEADER) {
1141            msg.headers_mut().set_date(Utc::now())?;
1142        }
1143
1144        if to_fix.contains(MessageConformance::MISSING_MIME_VERSION) {
1145            msg.headers_mut().set_mime_version("1.0")?;
1146        }
1147
1148        if to_fix.contains(MessageConformance::MISSING_MESSAGE_ID_HEADER) {
1149            if let Some(message_id) = &settings.message_id {
1150                msg.headers_mut()
1151                    .set_message_id(MessageID(message_id.clone().into()))?;
1152            }
1153        }
1154
1155        Ok(Some(msg))
1156    }
1157}
1158
1159#[derive(Default, Debug, Clone, Deserialize)]
1160pub struct CheckFixSettings {
1161    #[serde(default)]
1162    pub detect_encoding: bool,
1163    #[serde(default)]
1164    pub include_encodings: Vec<String>,
1165    #[serde(default)]
1166    pub exclude_encodings: Vec<String>,
1167    #[serde(default)]
1168    pub message_id: Option<String>,
1169    #[serde(skip)]
1170    pub data_bytes: Option<Arc<Box<[u8]>>>,
1171}
1172
1173/// References the position of a MimePart by encoding the steps in
1174/// a tree walking operation. The encoding of PartPointer is a
1175/// sequence of integers that identify the index of a child part
1176/// by its level within the mime tree, selecting the current node
1177/// when no more indices remain. eg: `[]` indicates the
1178/// root part, while `[0]` is the 0th child of the root.
1179#[derive(Debug, Clone, PartialEq, Eq)]
1180pub struct PartPointer(Vec<u8>);
1181
1182impl PartPointer {
1183    /// Construct a PartPointer that references the root node
1184    pub fn root() -> Self {
1185        Self(vec![])
1186    }
1187
1188    /// Construct a PartPointer that references either the nth
1189    /// or the root node depending upon the passed parameter
1190    pub fn root_or_nth(n: Option<u8>) -> Self {
1191        match n {
1192            Some(n) => Self::nth(n),
1193            None => Self::root(),
1194        }
1195    }
1196
1197    /// Construct a PartPointer that references the nth child
1198    pub fn nth(n: u8) -> Self {
1199        Self(vec![n])
1200    }
1201
1202    /// Join other onto self, consuming self and producing
1203    /// a pointer that makes other relative to self
1204    pub fn append(mut self, mut other: Self) -> Self {
1205        self.0.append(&mut other.0);
1206        Self(self.0)
1207    }
1208
1209    pub fn id_string(&self) -> String {
1210        let mut id = String::new();
1211        for p in &self.0 {
1212            if !id.is_empty() {
1213                id.push('.');
1214            }
1215            id.push_str(&p.to_string());
1216        }
1217        id
1218    }
1219}
1220
1221#[derive(Debug, Clone)]
1222pub struct SimplifiedStructurePointers {
1223    /// The primary text/plain part
1224    pub text_part: Option<PartPointer>,
1225    /// The primary text/html part
1226    pub html_part: Option<PartPointer>,
1227    /// The primary text/x-amp-html part
1228    pub amp_html_part: Option<PartPointer>,
1229    /// The "top level" set of headers for the message
1230    pub header_part: PartPointer,
1231    /// all other (terminal) parts are attachments
1232    pub attachments: Vec<PartPointer>,
1233}
1234
1235#[derive(Debug, Clone, PartialEq)]
1236pub struct SimplifiedStructure<'a> {
1237    pub text: Option<SharedString<'a>>,
1238    pub html: Option<SharedString<'a>>,
1239    pub amp_html: Option<SharedString<'a>>,
1240    pub headers: &'a HeaderMap<'a>,
1241    pub attachments: Vec<MimePart<'a>>,
1242}
1243
1244#[serde_as]
1245#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1246#[serde(deny_unknown_fields)]
1247pub struct AttachmentOptions {
1248    #[serde_as(as = "Option<BStringUtf8>")]
1249    #[serde(default)]
1250    pub file_name: Option<BString>,
1251    #[serde(default)]
1252    pub inline: bool,
1253    #[serde_as(as = "Option<BStringUtf8>")]
1254    #[serde(default)]
1255    pub content_id: Option<BString>,
1256}
1257
1258#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1259pub enum ContentTransferEncoding {
1260    SevenBit,
1261    EightBit,
1262    Binary,
1263    QuotedPrintable,
1264    Base64,
1265}
1266
1267impl FromStr for ContentTransferEncoding {
1268    type Err = MailParsingError;
1269
1270    fn from_str(s: &str) -> Result<Self> {
1271        if s.eq_ignore_ascii_case("7bit") {
1272            Ok(Self::SevenBit)
1273        } else if s.eq_ignore_ascii_case("8bit") {
1274            Ok(Self::EightBit)
1275        } else if s.eq_ignore_ascii_case("binary") {
1276            Ok(Self::Binary)
1277        } else if s.eq_ignore_ascii_case("quoted-printable") {
1278            Ok(Self::QuotedPrintable)
1279        } else if s.eq_ignore_ascii_case("base64") {
1280            Ok(Self::Base64)
1281        } else {
1282            Err(MailParsingError::InvalidContentTransferEncoding(
1283                s.to_string(),
1284            ))
1285        }
1286    }
1287}
1288
1289#[derive(Debug, PartialEq)]
1290pub enum DecodedBody<'a> {
1291    Text(SharedString<'a>),
1292    Binary(Vec<u8>),
1293}
1294
1295impl<'a> DecodedBody<'a> {
1296    pub fn to_string_lossy(&'a self) -> Cow<'a, str> {
1297        match self {
1298            Self::Text(s) => s.to_str_lossy(),
1299            Self::Binary(b) => String::from_utf8_lossy(b),
1300        }
1301    }
1302}
1303
1304#[cfg(test)]
1305mod test {
1306    use super::*;
1307
1308    #[test]
1309    fn msg_parsing() {
1310        let message = concat!(
1311            "Subject: hello there\n",
1312            "From:  Someone <someone@example.com>\n",
1313            "\n",
1314            "I am the body"
1315        );
1316
1317        let part = MimePart::parse(message).unwrap();
1318        k9::assert_equal!(message.as_bytes(), part.to_message_bytes());
1319        assert_eq!(part.raw_body(), "I am the body");
1320        k9::snapshot!(
1321            part.body(),
1322            r#"
1323Ok(
1324    Text(
1325        "I am the body",
1326    ),
1327)
1328"#
1329        );
1330
1331        k9::snapshot!(
1332            BString::from(part.rebuild(None).unwrap().to_message_bytes()),
1333            r#"
1334Content-Type: text/plain;\r
1335\tcharset="us-ascii"\r
1336Subject: hello there\r
1337From: Someone <someone@example.com>\r
1338\r
1339I am the body\r
1340
1341"#
1342        );
1343    }
1344
1345    #[test]
1346    fn mime_bogus_body() {
1347        let message = concat!(
1348            "Subject: hello there\n",
1349            "From: Someone <someone@example.com>\n",
1350            "Mime-Version: 1.0\n",
1351            "Content-Type: text/plain\n",
1352            "Content-Transfer-Encoding: base64\n",
1353            "\n",
1354            "hello\n"
1355        );
1356
1357        let part = MimePart::parse(message).unwrap();
1358        assert_eq!(
1359            part.body().unwrap_err(),
1360            MailParsingError::BodyParse(
1361                "base64 decode: invalid length at 4 b='o' in hello\n".to_string()
1362            )
1363        );
1364    }
1365
1366    #[test]
1367    fn mime_encoded_body() {
1368        let message = concat!(
1369            "Subject: hello there\n",
1370            "From: Someone <someone@example.com>\n",
1371            "Mime-Version: 1.0\n",
1372            "Content-Type: text/plain\n",
1373            "Content-Transfer-Encoding: base64\n",
1374            "\n",
1375            "aGVsbG8K\n"
1376        );
1377
1378        let part = MimePart::parse(message).unwrap();
1379        k9::assert_equal!(message.as_bytes(), part.to_message_bytes());
1380        assert_eq!(part.raw_body(), "aGVsbG8K\n");
1381        k9::snapshot!(
1382            part.body(),
1383            r#"
1384Ok(
1385    Text(
1386        "hello
1387",
1388    ),
1389)
1390"#
1391        );
1392
1393        k9::snapshot!(
1394            BString::from(part.rebuild(None).unwrap().to_message_bytes()),
1395            r#"
1396Content-Type: text/plain;\r
1397\tcharset="us-ascii"\r
1398Content-Transfer-Encoding: quoted-printable\r
1399Subject: hello there\r
1400From: Someone <someone@example.com>\r
1401Mime-Version: 1.0\r
1402\r
1403hello=0A\r
1404
1405"#
1406        );
1407    }
1408
1409    #[test]
1410    fn mime_multipart_1() {
1411        let message = concat!(
1412            "Subject: This is a test email\n",
1413            "Content-Type: multipart/alternative; boundary=foobar\n",
1414            "Mime-Version: 1.0\n",
1415            "Date: Sun, 02 Oct 2016 07:06:22 -0700 (PDT)\n",
1416            "\n",
1417            "--foobar\n",
1418            "Content-Type: text/plain; charset=utf-8\n",
1419            "Content-Transfer-Encoding: quoted-printable\n",
1420            "\n",
1421            "This is the plaintext version, in utf-8. Proof by Euro: =E2=82=AC\n",
1422            "--foobar\n",
1423            "Content-Type: text/html\n",
1424            "Content-Transfer-Encoding: base64\n",
1425            "\n",
1426            "PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \n",
1427            "dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \n",
1428            "--foobar--\n",
1429            "After the final boundary stuff gets ignored.\n"
1430        );
1431
1432        let part = MimePart::parse(message).unwrap();
1433
1434        k9::assert_equal!(message.as_bytes(), part.to_message_bytes());
1435
1436        let children = part.child_parts();
1437        k9::assert_equal!(children.len(), 2);
1438
1439        k9::snapshot!(
1440            children[0].body(),
1441            r#"
1442Ok(
1443    Text(
1444        "This is the plaintext version, in utf-8. Proof by Euro: €\r
1445",
1446    ),
1447)
1448"#
1449        );
1450        k9::snapshot!(
1451            children[1].body(),
1452            r#"
1453Ok(
1454    Text(
1455        "<html><body>This is the <b>HTML</b> version, in us-ascii. Proof by Euro: &euro;</body></html>
1456",
1457    ),
1458)
1459"#
1460        );
1461    }
1462
1463    #[test]
1464    fn mutate_1() {
1465        let message = concat!(
1466            "Subject: This is a test email\r\n",
1467            "Content-Type: multipart/alternative; boundary=foobar\r\n",
1468            "Mime-Version: 1.0\r\n",
1469            "Date: Sun, 02 Oct 2016 07:06:22 -0700 (PDT)\r\n",
1470            "\r\n",
1471            "--foobar\r\n",
1472            "Content-Type: text/plain; charset=utf-8\r\n",
1473            "Content-Transfer-Encoding: quoted-printable\r\n",
1474            "\r\n",
1475            "This is the plaintext version, in utf-8. Proof by Euro: =E2=82=AC\r\n",
1476            "--foobar\r\n",
1477            "Content-Type: text/html\r\n",
1478            "Content-Transfer-Encoding: base64\r\n",
1479            "\r\n",
1480            "PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \r\n",
1481            "dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \r\n",
1482            "--foobar--\r\n",
1483            "After the final boundary stuff gets ignored.\r\n"
1484        );
1485
1486        let mut part = MimePart::parse(message).unwrap();
1487        k9::assert_equal!(message.as_bytes(), part.to_message_bytes());
1488        fn munge(part: &mut MimePart) {
1489            let headers = part.headers_mut();
1490            headers.push(Header::with_name_value("X-Woot", "Hello"));
1491            headers.insert(0, Header::with_name_value("X-First", "at the top"));
1492            headers.retain(|hdr| !hdr.get_name().eq_ignore_ascii_case(b"date"));
1493        }
1494        munge(&mut part);
1495
1496        let re_encoded = BString::from(part.to_message_bytes());
1497        k9::snapshot!(
1498            re_encoded,
1499            r#"
1500X-First: at the top\r
1501Subject: This is a test email\r
1502Content-Type: multipart/alternative; boundary=foobar\r
1503Mime-Version: 1.0\r
1504X-Woot: Hello\r
1505\r
1506--foobar\r
1507Content-Type: text/plain; charset=utf-8\r
1508Content-Transfer-Encoding: quoted-printable\r
1509\r
1510This is the plaintext version, in utf-8. Proof by Euro: =E2=82=AC\r
1511--foobar\r
1512Content-Type: text/html\r
1513Content-Transfer-Encoding: base64\r
1514\r
1515PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \r
1516dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \r
1517--foobar--\r
1518After the final boundary stuff gets ignored.\r
1519
1520"#
1521        );
1522
1523        eprintln!("part before mutate:\n{part:#?}");
1524
1525        part.child_parts_mut().retain(|part| {
1526            let ct = part.headers().content_type().unwrap().unwrap();
1527            ct.value == "text/html"
1528        });
1529
1530        eprintln!("part with html removed is:\n{part:#?}");
1531
1532        let re_encoded = BString::from(part.to_message_bytes());
1533        k9::snapshot!(
1534            re_encoded,
1535            r#"
1536X-First: at the top\r
1537Subject: This is a test email\r
1538Content-Type: multipart/alternative; boundary=foobar\r
1539Mime-Version: 1.0\r
1540X-Woot: Hello\r
1541\r
1542--foobar\r
1543Content-Type: text/html\r
1544Content-Transfer-Encoding: base64\r
1545\r
1546PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \r
1547dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \r
1548--foobar--\r
1549After the final boundary stuff gets ignored.\r
1550
1551"#
1552        );
1553    }
1554
1555    #[test]
1556    fn replace_text_body() {
1557        let mut part = MimePart::new_text_plain("Hello 👻\r\n").unwrap();
1558        let encoded = BString::from(part.to_message_bytes());
1559        k9::snapshot!(
1560            &encoded,
1561            r#"
1562Content-Type: text/plain;\r
1563\tcharset="utf-8"\r
1564Content-Transfer-Encoding: base64\r
1565\r
1566SGVsbG8g8J+Ruw0K\r
1567
1568"#
1569        );
1570
1571        part.replace_text_body("text/plain", "Hello 🚀\r\n")
1572            .unwrap();
1573        let encoded = BString::from(part.to_message_bytes());
1574        k9::snapshot!(
1575            &encoded,
1576            r#"
1577Content-Type: text/plain;\r
1578\tcharset="utf-8"\r
1579Content-Transfer-Encoding: base64\r
1580\r
1581SGVsbG8g8J+agA0K\r
1582
1583"#
1584        );
1585    }
1586
1587    #[test]
1588    fn construct_1() {
1589        let input_text = "Well, hello there! This is the plaintext version, in utf-8. Here's a Euro: €, and here are some emoji 👻 🍉 💩 and this long should be long enough that we wrap it in the returned part, let's see how that turns out!\r\n";
1590
1591        let part = MimePart::new_text_plain(input_text).unwrap();
1592
1593        let encoded = BString::from(part.to_message_bytes());
1594        k9::snapshot!(
1595            &encoded,
1596            r#"
1597Content-Type: text/plain;\r
1598\tcharset="utf-8"\r
1599Content-Transfer-Encoding: quoted-printable\r
1600\r
1601Well, hello there! This is the plaintext version, in utf-8. Here's a Euro: =\r
1602=E2=82=AC, and here are some emoji =F0=9F=91=BB =F0=9F=8D=89 =F0=9F=92=A9 a=\r
1603nd this long should be long enough that we wrap it in the returned part, le=\r
1604t's see how that turns out!\r
1605
1606"#
1607        );
1608
1609        let parsed_part = MimePart::parse(encoded.clone()).unwrap();
1610        k9::assert_equal!(encoded, parsed_part.to_message_bytes());
1611        k9::assert_equal!(part.body().unwrap(), DecodedBody::Text(input_text.into()));
1612        k9::snapshot!(
1613            parsed_part.simplified_structure_pointers(),
1614            "
1615Ok(
1616    SimplifiedStructurePointers {
1617        text_part: Some(
1618            PartPointer(
1619                [],
1620            ),
1621        ),
1622        html_part: None,
1623        amp_html_part: None,
1624        header_part: PartPointer(
1625            [],
1626        ),
1627        attachments: [],
1628    },
1629)
1630"
1631        );
1632    }
1633
1634    #[test]
1635    fn construct_2() {
1636        let msg = MimePart::new_multipart(
1637            "multipart/mixed",
1638            vec![
1639                MimePart::new_text_plain("plain text").unwrap(),
1640                MimePart::new_html("<b>rich</b> text").unwrap(),
1641                MimePart::new_binary(
1642                    "application/octet-stream",
1643                    &[0, 1, 2, 3],
1644                    Some(&AttachmentOptions {
1645                        file_name: Some("woot.bin".into()),
1646                        inline: false,
1647                        content_id: Some("woot.id".into()),
1648                    }),
1649                )
1650                .unwrap(),
1651            ],
1652            Some(b"my-boundary"),
1653        )
1654        .unwrap();
1655        k9::snapshot!(
1656            BString::from(msg.to_message_bytes()),
1657            r#"
1658Content-Type: multipart/mixed;\r
1659\tboundary="my-boundary"\r
1660\r
1661--my-boundary\r
1662Content-Type: text/plain;\r
1663\tcharset="us-ascii"\r
1664\r
1665plain text\r
1666--my-boundary\r
1667Content-Type: text/html;\r
1668\tcharset="us-ascii"\r
1669\r
1670<b>rich</b> text\r
1671--my-boundary\r
1672Content-Disposition: attachment;\r
1673\tfilename="woot.bin"\r
1674Content-ID: <woot.id>\r
1675Content-Type: application/octet-stream;\r
1676\tname="woot.bin"\r
1677Content-Transfer-Encoding: base64\r
1678\r
1679AAECAw==\r
1680--my-boundary--\r
1681
1682"#
1683        );
1684
1685        k9::snapshot!(
1686            msg.simplified_structure_pointers(),
1687            "
1688Ok(
1689    SimplifiedStructurePointers {
1690        text_part: Some(
1691            PartPointer(
1692                [
1693                    0,
1694                ],
1695            ),
1696        ),
1697        html_part: Some(
1698            PartPointer(
1699                [
1700                    1,
1701                ],
1702            ),
1703        ),
1704        amp_html_part: None,
1705        header_part: PartPointer(
1706            [],
1707        ),
1708        attachments: [
1709            PartPointer(
1710                [
1711                    2,
1712                ],
1713            ),
1714        ],
1715    },
1716)
1717"
1718        );
1719    }
1720
1721    #[test]
1722    fn attachment_name_order_prefers_content_disposition() {
1723        let message = concat!(
1724            "Content-Type: multipart/mixed;\r\n",
1725            "	boundary=\"woot\"\r\n",
1726            "\r\n",
1727            "--woot\r\n",
1728            "Content-Type: text/plain;\r\n",
1729            "	charset=\"us-ascii\"\r\n",
1730            "\r\n",
1731            "Hello, I am the main message content\r\n",
1732            "--woot\r\n",
1733            "Content-Disposition: attachment;\r\n",
1734            "	filename=cdname\r\n",
1735            "Content-Type: application/octet-stream;\r\n",
1736            "	name=ctname\r\n",
1737            "Content-Transfer-Encoding: base64\r\n",
1738            "\r\n",
1739            "u6o=\r\n",
1740            "--woot--\r\n"
1741        );
1742        let part = MimePart::parse(message).unwrap();
1743        let structure = part.simplified_structure().unwrap();
1744
1745        k9::assert_equal!(
1746            structure.attachments[0].rfc2045_info().attachment_options,
1747            Some(AttachmentOptions {
1748                content_id: None,
1749                inline: false,
1750                file_name: Some("cdname".into()),
1751            })
1752        );
1753    }
1754
1755    #[test]
1756    fn attachment_name_accepts_content_type_name() {
1757        let message = concat!(
1758            "Content-Type: multipart/mixed;\r\n",
1759            "	boundary=\"woot\"\r\n",
1760            "\r\n",
1761            "--woot\r\n",
1762            "Content-Type: text/plain;\r\n",
1763            "	charset=\"us-ascii\"\r\n",
1764            "\r\n",
1765            "Hello, I am the main message content\r\n",
1766            "--woot\r\n",
1767            "Content-Disposition: attachment\r\n",
1768            "Content-Type: application/octet-stream;\r\n",
1769            "	name=ctname\r\n",
1770            "Content-Transfer-Encoding: base64\r\n",
1771            "\r\n",
1772            "u6o=\r\n",
1773            "--woot--\r\n"
1774        );
1775        let part = MimePart::parse(message).unwrap();
1776        let structure = part.simplified_structure().unwrap();
1777
1778        k9::assert_equal!(
1779            structure.attachments[0].rfc2045_info().attachment_options,
1780            Some(AttachmentOptions {
1781                content_id: None,
1782                inline: false,
1783                file_name: Some("ctname".into()),
1784            })
1785        );
1786    }
1787
1788    #[test]
1789    fn funky_headers() {
1790        let message = concat!(
1791            "Subject\r\n",
1792            "Other:\r\n",
1793            "Content-Type: multipart/alternative; boundary=foobar\r\n",
1794            "Mime-Version: 1.0\r\n",
1795            "Date: Sun, 02 Oct 2016 07:06:22 -0700 (PDT)\r\n",
1796            "\r\n",
1797            "The body.\r\n"
1798        );
1799
1800        let part = MimePart::parse(message).unwrap();
1801        assert!(part
1802            .conformance()
1803            .contains(MessageConformance::MISSING_COLON_VALUE));
1804    }
1805
1806    /// This is a regression test for an issue where we'd interpret the
1807    /// binary bytes as default windows-1252 codepage charset, and mangle them.
1808    /// The high byte is sufficient to trigger the offending code prior
1809    /// to the fix
1810    #[test]
1811    fn rebuild_binary() {
1812        let expect = &[0, 1, 2, 3, 0xbe, 4, 5];
1813        let part = MimePart::new_binary("applicat/octet-stream", expect, None).unwrap();
1814
1815        let rebuilt = part.rebuild(None).unwrap();
1816        let body = rebuilt.body().unwrap();
1817
1818        assert_eq!(body, DecodedBody::Binary(expect.to_vec()));
1819    }
1820
1821    /// Validate that we don't lose supplemental mime parameters like:
1822    /// `Content-Type: text/calendar; method=REQUEST`
1823    #[test]
1824    fn rebuild_invitation() {
1825        let message = concat!(
1826            "Subject: Test for events 2\r\n",
1827            "Content-Type: multipart/mixed;\r\n",
1828            " boundary=8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3\r\n",
1829            "\r\n",
1830            "--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3\r\n",
1831            "Content-Type: multipart/alternative;\r\n",
1832            " boundary=a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f\r\n",
1833            "\r\n",
1834            "--a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f\r\n",
1835            "Content-Transfer-Encoding: quoted-printable\r\n",
1836            "Content-Type: text/plain; charset=UTF-8\r\n",
1837            "\r\n",
1838            "This is a test for calendar event invitation\r\n",
1839            "--a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f\r\n",
1840            "Content-Transfer-Encoding: quoted-printable\r\n",
1841            "Content-Type: text/html; charset=UTF-8\r\n",
1842            "\r\n",
1843            "<p>This is a test for calendar event invitation</p>\r\n",
1844            "--a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f--\r\n",
1845            "\r\n",
1846            "--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3\r\n",
1847            "Content-Disposition: inline; name=\"Invitation.ics\"\r\n",
1848            "Content-Type: text/calendar; method=REQUEST; name=\"Invitation.ics\"\r\n",
1849            "\r\n",
1850            "Invitation\r\n",
1851            "--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3\r\n",
1852            "Content-Disposition: attachment; filename=\"event.ics\"\r\n",
1853            "Content-Type: application/ics\r\n",
1854            "\r\n",
1855            "Event\r\n",
1856            "--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3--\r\n",
1857            "\r\n"
1858        );
1859
1860        let part = MimePart::parse(message).unwrap();
1861        let rebuilt = part.rebuild(None).unwrap();
1862
1863        k9::snapshot!(
1864            BString::from(rebuilt.to_message_bytes()),
1865            r#"
1866Content-Type: multipart/mixed;\r
1867\tboundary="8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3"\r
1868Subject: Test for events 2\r
1869\r
1870--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3\r
1871Content-Type: multipart/alternative;\r
1872\tboundary="a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f"\r
1873\r
1874--a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f\r
1875Content-Type: text/plain;\r
1876\tcharset="us-ascii"\r
1877\r
1878This is a test for calendar event invitation\r
1879--a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f\r
1880Content-Type: text/html;\r
1881\tcharset="us-ascii"\r
1882\r
1883<p>This is a test for calendar event invitation</p>\r
1884--a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f--\r
1885--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3\r
1886Content-Type: text/calendar;\r
1887\tcharset="us-ascii";\r
1888\tmethod="REQUEST";\r
1889\tname="Invitation.ics"\r
1890\r
1891Invitation\r
1892--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3\r
1893Content-Disposition: attachment;\r
1894\tfilename="event.ics"\r
1895Content-Type: application/ics;\r
1896\tname="event.ics"\r
1897Content-Transfer-Encoding: base64\r
1898\r
1899RXZlbnQNCg==\r
1900--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3--\r
1901
1902"#
1903        );
1904    }
1905
1906    #[test]
1907    fn check_conformance_angle_msg_id() {
1908        const DOUBLE_ANGLE_ONLY: &str = "Subject: hello\r
1909Message-ID: <<1234@example.com>>\r
1910\r
1911Hello";
1912        let msg = MimePart::parse(DOUBLE_ANGLE_ONLY).unwrap();
1913        k9::snapshot!(
1914            msg.check_fix_conformance(
1915                MessageConformance::MISSING_MESSAGE_ID_HEADER,
1916                MessageConformance::empty(),
1917                CheckFixSettings::default(),
1918            )
1919            .unwrap_err()
1920            .to_string(),
1921            "Message has conformance issues: MISSING_MESSAGE_ID_HEADER"
1922        );
1923
1924        let rebuilt = BString::from(
1925            msg.check_fix_conformance(
1926                MessageConformance::MISSING_MESSAGE_ID_HEADER,
1927                MessageConformance::MISSING_MESSAGE_ID_HEADER,
1928                CheckFixSettings {
1929                    message_id: Some("id@example.com".to_string()),
1930                    ..Default::default()
1931                },
1932            )
1933            .unwrap()
1934            .unwrap()
1935            .to_message_bytes(),
1936        );
1937
1938        k9::snapshot!(
1939            rebuilt,
1940            r#"
1941Subject: hello\r
1942Message-ID: <id@example.com>\r
1943\r
1944Hello
1945"#
1946        );
1947
1948        const DOUBLE_ANGLE_AND_LONG_LINE: &str = "Subject: hello\r
1949Message-ID: <<1234@example.com>>\r
1950\r
1951Hello this is a really long line Hello this is a really long line \
1952Hello this is a really long line Hello this is a really long line \
1953Hello this is a really long line Hello this is a really long line \
1954Hello this is a really long line Hello this is a really long line \
1955Hello this is a really long line Hello this is a really long line \
1956Hello this is a really long line Hello this is a really long line \
1957Hello this is a really long line Hello this is a really long line
1958";
1959        let msg = MimePart::parse(DOUBLE_ANGLE_AND_LONG_LINE).unwrap();
1960        let rebuilt = BString::from(
1961            msg.check_fix_conformance(
1962                MessageConformance::MISSING_COLON_VALUE,
1963                MessageConformance::MISSING_MESSAGE_ID_HEADER | MessageConformance::LINE_TOO_LONG,
1964                CheckFixSettings {
1965                    message_id: Some("id@example.com".to_string()),
1966                    ..Default::default()
1967                },
1968            )
1969            .unwrap()
1970            .unwrap()
1971            .to_message_bytes(),
1972        );
1973
1974        k9::snapshot!(
1975            rebuilt,
1976            r#"
1977Content-Type: text/plain;\r
1978\tcharset="us-ascii"\r
1979Content-Transfer-Encoding: quoted-printable\r
1980Subject: hello\r
1981Message-ID: <id@example.com>\r
1982\r
1983Hello this is a really long line Hello this is a really long line Hello thi=\r
1984s is a really long line Hello this is a really long line Hello this is a re=\r
1985ally long line Hello this is a really long line Hello this is a really long=\r
1986 line Hello this is a really long line Hello this is a really long line Hel=\r
1987lo this is a really long line Hello this is a really long line Hello this i=\r
1988s a really long line Hello this is a really long line Hello this is a reall=\r
1989y long line=0A\r
1990
1991"#
1992        );
1993    }
1994
1995    #[test]
1996    fn check_conformance() {
1997        const MULTI_HEADER_CONTENT: &str =
1998        "X-Hello: there\r\nX-Header: value\r\nSubject: Hello\r\nX-Header: another value\r\nFrom :Someone@somewhere\r\n\r\nBody";
1999
2000        let msg = MimePart::parse(MULTI_HEADER_CONTENT).unwrap();
2001        let rebuilt = BString::from(
2002            msg.check_fix_conformance(
2003                MessageConformance::default(),
2004                MessageConformance::MISSING_MIME_VERSION,
2005                CheckFixSettings::default(),
2006            )
2007            .unwrap()
2008            .unwrap()
2009            .to_message_bytes(),
2010        );
2011        k9::snapshot!(
2012            rebuilt,
2013            r#"
2014X-Hello: there\r
2015X-Header: value\r
2016Subject: Hello\r
2017X-Header: another value\r
2018From :Someone@somewhere\r
2019Mime-Version: 1.0\r
2020\r
2021Body
2022"#
2023        );
2024
2025        let msg = MimePart::parse(MULTI_HEADER_CONTENT).unwrap();
2026        let rebuilt = BString::from(
2027            msg.check_fix_conformance(
2028                MessageConformance::default(),
2029                MessageConformance::MISSING_MIME_VERSION | MessageConformance::NAME_ENDS_WITH_SPACE,
2030                CheckFixSettings::default(),
2031            )
2032            .unwrap()
2033            .unwrap()
2034            .to_message_bytes(),
2035        );
2036        k9::snapshot!(
2037            rebuilt,
2038            r#"
2039Content-Type: text/plain;\r
2040\tcharset="us-ascii"\r
2041X-Hello: there\r
2042X-Header: value\r
2043Subject: Hello\r
2044X-Header: another value\r
2045From: <Someone@somewhere>\r
2046Mime-Version: 1.0\r
2047\r
2048Body\r
2049
2050"#
2051        );
2052    }
2053
2054    #[test]
2055    fn check_fix_latin_input() {
2056        const POUNDS: &[u8] = b"Subject: \xa3\r\n\r\nGBP\r\n";
2057        let msg = MimePart::parse(POUNDS).unwrap();
2058        assert_eq!(
2059            msg.conformance(),
2060            MessageConformance::NEEDS_TRANSFER_ENCODING
2061                | MessageConformance::MISSING_DATE_HEADER
2062                | MessageConformance::MISSING_MESSAGE_ID_HEADER
2063                | MessageConformance::MISSING_MIME_VERSION
2064        );
2065        let rebuilt = msg
2066            .check_fix_conformance(
2067                MessageConformance::default(),
2068                MessageConformance::NEEDS_TRANSFER_ENCODING,
2069                CheckFixSettings {
2070                    detect_encoding: true,
2071                    include_encodings: vec!["iso-8859-1".to_string()],
2072                    data_bytes: Some(Arc::new(POUNDS.into())),
2073                    ..Default::default()
2074                },
2075            )
2076            .unwrap()
2077            .unwrap();
2078
2079        let subject = rebuilt.headers.subject().unwrap().unwrap();
2080        assert_eq!(subject, "£");
2081    }
2082
2083    // The issue here is that the message is text/plain with no explicit
2084    // charset, and is thus implicitly us-ascii.  But the part is actually
2085    // utf-8 content inside base64. Since the transfer encoding is 7-bit
2086    // it doesn't get flagged as improper encoding during the initial
2087    // parse.
2088    // We want to ensure that it is found during check-fix, and is corrected.
2089    #[test]
2090    fn check_fix_utf8_inside_transfer_encoding() {
2091        const CONTENT: &str = "Subject: hello\r\nContent-Type: text/plain\r\nContent-Transfer-Encoding: base64\r\n\r\n2KrYs9iqDQoNCg==\r\n";
2092
2093        let msg = MimePart::parse(CONTENT).unwrap();
2094
2095        // Initial parse cannot see that the content is actually utf-8,
2096        // which conflicts with the implicit us-ascii charset for a text/ part.
2097        assert_eq!(
2098            msg.conformance(),
2099            MessageConformance::MISSING_DATE_HEADER
2100                | MessageConformance::MISSING_MESSAGE_ID_HEADER
2101                | MessageConformance::MISSING_MIME_VERSION
2102        );
2103
2104        // Deep check flags the invalid charset and sets NEEDS_TRANSFER_ENCODING
2105        assert_eq!(
2106            msg.deep_conformance_check(),
2107            MessageConformance::NEEDS_TRANSFER_ENCODING
2108                | MessageConformance::MISSING_DATE_HEADER
2109                | MessageConformance::MISSING_MESSAGE_ID_HEADER
2110                | MessageConformance::MISSING_MIME_VERSION
2111        );
2112        let rebuilt = msg
2113            .check_fix_conformance(
2114                MessageConformance::default(),
2115                MessageConformance::NEEDS_TRANSFER_ENCODING,
2116                CheckFixSettings::default(),
2117            )
2118            .unwrap()
2119            .unwrap();
2120
2121        eprintln!("{rebuilt:?}");
2122        assert_eq!(rebuilt.body().unwrap().to_string_lossy().trim(), "تست");
2123    }
2124
2125    #[test]
2126    fn check_fix_latin1_inside_transfer_encoding() {
2127        const CONTENT: &str = "Subject: hello\r\nContent-Type: text/plain\r\nContent-Transfer-Encoding: base64\r\n\r\nVGhlIGNvc3QgaXMgozQyLjAwCg==\r\n";
2128
2129        let msg = MimePart::parse(CONTENT).unwrap();
2130
2131        // Initial parse cannot see that the content is actually utf-8,
2132        // which conflicts with the implicit us-ascii charset for a text/ part.
2133        assert_eq!(
2134            msg.conformance(),
2135            MessageConformance::MISSING_DATE_HEADER
2136                | MessageConformance::MISSING_MESSAGE_ID_HEADER
2137                | MessageConformance::MISSING_MIME_VERSION
2138        );
2139
2140        // Deep check flags the invalid charset and sets NEEDS_TRANSFER_ENCODING
2141        assert_eq!(
2142            msg.deep_conformance_check(),
2143            MessageConformance::NEEDS_TRANSFER_ENCODING
2144                | MessageConformance::MISSING_DATE_HEADER
2145                | MessageConformance::MISSING_MESSAGE_ID_HEADER
2146                | MessageConformance::MISSING_MIME_VERSION
2147        );
2148        let rebuilt = msg
2149            .check_fix_conformance(
2150                MessageConformance::default(),
2151                MessageConformance::NEEDS_TRANSFER_ENCODING,
2152                CheckFixSettings {
2153                    detect_encoding: true,
2154                    include_encodings: vec!["iso-8859-1".to_string()],
2155                    ..Default::default()
2156                },
2157            )
2158            .unwrap()
2159            .unwrap();
2160
2161        eprintln!("{rebuilt:?}");
2162        assert_eq!(
2163            rebuilt.body().unwrap().to_string_lossy().trim(),
2164            "The cost is £42.00"
2165        );
2166    }
2167
2168    #[test]
2169    fn check_fix_unknown_inside_transfer_encoding() {
2170        // `owo=` is 0xa3 (a UK Sterling/Pound sign in latin-1.
2171        // The length of the data passed to the charset detector
2172        // is insufficient for it to decide the charset, so we
2173        // should not expect to see a valid text part emitted.
2174        const CONTENT: &str = "Subject: hello\r\nContent-Type: text/plain\r\nContent-Transfer-Encoding: base64\r\n\r\nowo=\r\n";
2175
2176        let msg = MimePart::parse(CONTENT).unwrap();
2177
2178        // Initial parse cannot see that the content is actually utf-8,
2179        // which conflicts with the implicit us-ascii charset for a text/ part.
2180        assert_eq!(
2181            msg.conformance(),
2182            MessageConformance::MISSING_DATE_HEADER
2183                | MessageConformance::MISSING_MESSAGE_ID_HEADER
2184                | MessageConformance::MISSING_MIME_VERSION
2185        );
2186
2187        // Deep check flags the invalid charset and sets NEEDS_TRANSFER_ENCODING
2188        assert_eq!(
2189            msg.deep_conformance_check(),
2190            MessageConformance::NEEDS_TRANSFER_ENCODING
2191                | MessageConformance::MISSING_DATE_HEADER
2192                | MessageConformance::MISSING_MESSAGE_ID_HEADER
2193                | MessageConformance::MISSING_MIME_VERSION
2194        );
2195        let rebuilt = msg
2196            .check_fix_conformance(
2197                MessageConformance::default(),
2198                MessageConformance::NEEDS_TRANSFER_ENCODING,
2199                CheckFixSettings {
2200                    detect_encoding: true,
2201                    include_encodings: vec!["iso-8859-1".to_string()],
2202                    ..Default::default()
2203                },
2204            )
2205            .unwrap()
2206            .unwrap();
2207
2208        eprintln!("{rebuilt:?}");
2209        assert_eq!(rebuilt.body().unwrap().to_string_lossy().trim(), "�");
2210    }
2211
2212    #[test]
2213    fn nested_multipart_mixed_related() {
2214        // Reproduces the structure: multipart/mixed -> multipart/related -> [text/html, image/png]
2215        let message = concat!(
2216            "MIME-Version: 1.0\r\n",
2217            "Content-Type: multipart/mixed;\r\n",
2218            "\tboundary=\"----=_Part_602641_1899404624.1775349148919\"\r\n",
2219            "\r\n",
2220            "------=_Part_602641_1899404624.1775349148919\r\n",
2221            "Content-Type: multipart/related;\r\n",
2222            "\tboundary=\"----=_Part_602642_1070442961.1775349148920\"\r\n",
2223            "\r\n",
2224            "------=_Part_602642_1070442961.1775349148920\r\n",
2225            "Content-Type: text/html;charset=UTF-8\r\n",
2226            "Content-Transfer-Encoding: quoted-printable\r\n",
2227            "\r\n",
2228            "<html><body>Test HTML</body></html>\r\n",
2229            "------=_Part_602642_1070442961.1775349148920\r\n",
2230            "Content-Type: image/png; name=inline\r\n",
2231            "Content-Transfer-Encoding: base64\r\n",
2232            "Content-Disposition: inline; filename=inline\r\n",
2233            "Content-ID: <dell-aiops>\r\n",
2234            "\r\n",
2235            "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==\r\n",
2236            "------=_Part_602642_1070442961.1775349148920--\r\n",
2237            "------=_Part_602641_1899404624.1775349148919--\r\n"
2238        );
2239
2240        let root = MimePart::parse(message).unwrap();
2241
2242        /// Extract content-type from part
2243        fn ct(p: &MimePart) -> String {
2244            p.headers()
2245                .content_type()
2246                .unwrap()
2247                .unwrap()
2248                .value
2249                .to_string()
2250        }
2251
2252        assert_eq!(ct(&root), "multipart/mixed");
2253
2254        // Structure check: root should have 1 part (multipart/related)
2255        let [related_part] = &root.child_parts()[..] else {
2256            panic!("root must have one child")
2257        };
2258        assert_eq!(ct(related_part), "multipart/related");
2259
2260        // multipart/related should have 2 parts (text/html and image)
2261        let [html_part, image_part] = &related_part.child_parts()[..] else {
2262            panic!("related part must have two children")
2263        };
2264
2265        // Check content types
2266        assert_eq!(ct(html_part), "text/html");
2267        assert_eq!(ct(image_part), "image/png");
2268
2269        // Verify simplified structure can be retrieved (this tests the PartRef resolution path)
2270        let simplified = root.simplified_structure().unwrap();
2271        let DecodedBody::Text(html) = html_part.body().unwrap() else {
2272            panic!("must be text")
2273        };
2274        assert_eq!(
2275            simplified,
2276            SimplifiedStructure {
2277                text: None,
2278                html: Some(html),
2279                amp_html: None,
2280                headers: &root.headers(),
2281                attachments: vec![image_part.clone()],
2282            }
2283        );
2284    }
2285}