1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
"""Tests for parse_received_headers()."""
import email
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from conftest import make_plain_message, add_received_headers
from email.message import EmailMessage
# Import the function under test
import importlib.util
spec = importlib.util.spec_from_file_location(
"eml_script",
os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
)
eml_script = importlib.util.module_from_spec(spec)
spec.loader.exec_module(eml_script)
parse_received_headers = eml_script.parse_received_headers
class TestSingleHeader:
def test_header_with_from_and_by(self):
msg = EmailMessage()
msg['Received'] = (
'from mail-sender.example.com by mx.receiver.example.com '
'with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600'
)
result = parse_received_headers(msg)
assert result['sent_server'] == 'mail-sender.example.com'
assert result['received_server'] == 'mx.receiver.example.com'
assert result['sent_time'] == 'Thu, 05 Feb 2026 11:36:05 -0600'
assert result['received_time'] == 'Thu, 05 Feb 2026 11:36:05 -0600'
class TestMultipleHeaders:
def test_uses_first_with_both_from_and_by(self):
msg = EmailMessage()
# Most recent first (by only)
msg['Received'] = 'by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600'
# Next: has both from and by — this should be selected
msg['Received'] = (
'from mail-sender.example.com by mx.receiver.example.com '
'with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600'
)
# Oldest
msg['Received'] = (
'from originator.example.com by relay.example.com '
'with SMTP; Thu, 05 Feb 2026 11:35:58 -0600'
)
result = parse_received_headers(msg)
assert result['sent_server'] == 'mail-sender.example.com'
assert result['received_server'] == 'mx.receiver.example.com'
class TestNoReceivedHeaders:
def test_all_values_none(self):
msg = EmailMessage()
result = parse_received_headers(msg)
assert result['sent_time'] is None
assert result['sent_server'] is None
assert result['received_time'] is None
assert result['received_server'] is None
class TestByButNoFrom:
def test_falls_back_to_first_header(self):
msg = EmailMessage()
msg['Received'] = 'by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600'
result = parse_received_headers(msg)
assert result['received_server'] == 'internal.example.com'
assert result['received_time'] == 'Thu, 05 Feb 2026 11:36:10 -0600'
# No from in any header, so sent_server stays None
assert result['sent_server'] is None
class TestMultilineFoldedHeader:
def test_normalizes_whitespace(self):
# Use email.message_from_string to parse raw folded headers
# (EmailMessage policy rejects embedded CRLF in set values)
raw = (
"From: test@example.com\r\n"
"Received: from mail-sender.example.com\r\n"
" by mx.receiver.example.com\r\n"
" with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600\r\n"
"\r\n"
"body\r\n"
)
msg = email.message_from_string(raw)
result = parse_received_headers(msg)
assert result['sent_server'] == 'mail-sender.example.com'
assert result['received_server'] == 'mx.receiver.example.com'
class TestMalformedTimestamp:
def test_no_semicolon(self):
msg = EmailMessage()
msg['Received'] = 'from sender.example.com by receiver.example.com with SMTP'
result = parse_received_headers(msg)
assert result['sent_server'] == 'sender.example.com'
assert result['received_server'] == 'receiver.example.com'
assert result['sent_time'] is None
assert result['received_time'] is None
|