aboutsummaryrefslogtreecommitdiff
path: root/docs/scripts/tests/test_parse_received_headers.py
blob: e12e1fb6fae1a8b9d09daaa317c876096b15291a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
"""Tests for parse_received_headers()."""

import email
import sys
import os

sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))

from conftest import make_plain_message, add_received_headers
from email.message import EmailMessage

# Import the function under test
import importlib.util
spec = importlib.util.spec_from_file_location(
    "eml_script",
    os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
)
eml_script = importlib.util.module_from_spec(spec)
spec.loader.exec_module(eml_script)

parse_received_headers = eml_script.parse_received_headers


class TestSingleHeader:
    def test_header_with_from_and_by(self):
        msg = EmailMessage()
        msg['Received'] = (
            'from mail-sender.example.com by mx.receiver.example.com '
            'with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600'
        )
        result = parse_received_headers(msg)
        assert result['sent_server'] == 'mail-sender.example.com'
        assert result['received_server'] == 'mx.receiver.example.com'
        assert result['sent_time'] == 'Thu, 05 Feb 2026 11:36:05 -0600'
        assert result['received_time'] == 'Thu, 05 Feb 2026 11:36:05 -0600'


class TestMultipleHeaders:
    def test_uses_first_with_both_from_and_by(self):
        msg = EmailMessage()
        # Most recent first (by only)
        msg['Received'] = 'by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600'
        # Next: has both from and by — this should be selected
        msg['Received'] = (
            'from mail-sender.example.com by mx.receiver.example.com '
            'with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600'
        )
        # Oldest
        msg['Received'] = (
            'from originator.example.com by relay.example.com '
            'with SMTP; Thu, 05 Feb 2026 11:35:58 -0600'
        )
        result = parse_received_headers(msg)
        assert result['sent_server'] == 'mail-sender.example.com'
        assert result['received_server'] == 'mx.receiver.example.com'


class TestNoReceivedHeaders:
    def test_all_values_none(self):
        msg = EmailMessage()
        result = parse_received_headers(msg)
        assert result['sent_time'] is None
        assert result['sent_server'] is None
        assert result['received_time'] is None
        assert result['received_server'] is None


class TestByButNoFrom:
    def test_falls_back_to_first_header(self):
        msg = EmailMessage()
        msg['Received'] = 'by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600'
        result = parse_received_headers(msg)
        assert result['received_server'] == 'internal.example.com'
        assert result['received_time'] == 'Thu, 05 Feb 2026 11:36:10 -0600'
        # No from in any header, so sent_server stays None
        assert result['sent_server'] is None


class TestMultilineFoldedHeader:
    def test_normalizes_whitespace(self):
        # Use email.message_from_string to parse raw folded headers
        # (EmailMessage policy rejects embedded CRLF in set values)
        raw = (
            "From: test@example.com\r\n"
            "Received: from mail-sender.example.com\r\n"
            "        by mx.receiver.example.com\r\n"
            "        with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600\r\n"
            "\r\n"
            "body\r\n"
        )
        msg = email.message_from_string(raw)
        result = parse_received_headers(msg)
        assert result['sent_server'] == 'mail-sender.example.com'
        assert result['received_server'] == 'mx.receiver.example.com'


class TestMalformedTimestamp:
    def test_no_semicolon(self):
        msg = EmailMessage()
        msg['Received'] = 'from sender.example.com by receiver.example.com with SMTP'
        result = parse_received_headers(msg)
        assert result['sent_server'] == 'sender.example.com'
        assert result['received_server'] == 'receiver.example.com'
        assert result['sent_time'] is None
        assert result['received_time'] is None