diff options
Diffstat (limited to 'docs/scripts/tests/test_parse_received_headers.py')
| -rw-r--r-- | docs/scripts/tests/test_parse_received_headers.py | 105 |
1 files changed, 105 insertions, 0 deletions
diff --git a/docs/scripts/tests/test_parse_received_headers.py b/docs/scripts/tests/test_parse_received_headers.py new file mode 100644 index 0000000..e12e1fb --- /dev/null +++ b/docs/scripts/tests/test_parse_received_headers.py @@ -0,0 +1,105 @@ +"""Tests for parse_received_headers().""" + +import email +import sys +import os + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +from conftest import make_plain_message, add_received_headers +from email.message import EmailMessage + +# Import the function under test +import importlib.util +spec = importlib.util.spec_from_file_location( + "eml_script", + os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') +) +eml_script = importlib.util.module_from_spec(spec) +spec.loader.exec_module(eml_script) + +parse_received_headers = eml_script.parse_received_headers + + +class TestSingleHeader: + def test_header_with_from_and_by(self): + msg = EmailMessage() + msg['Received'] = ( + 'from mail-sender.example.com by mx.receiver.example.com ' + 'with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600' + ) + result = parse_received_headers(msg) + assert result['sent_server'] == 'mail-sender.example.com' + assert result['received_server'] == 'mx.receiver.example.com' + assert result['sent_time'] == 'Thu, 05 Feb 2026 11:36:05 -0600' + assert result['received_time'] == 'Thu, 05 Feb 2026 11:36:05 -0600' + + +class TestMultipleHeaders: + def test_uses_first_with_both_from_and_by(self): + msg = EmailMessage() + # Most recent first (by only) + msg['Received'] = 'by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600' + # Next: has both from and by — this should be selected + msg['Received'] = ( + 'from mail-sender.example.com by mx.receiver.example.com ' + 'with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600' + ) + # Oldest + msg['Received'] = ( + 'from originator.example.com by relay.example.com ' + 'with SMTP; Thu, 05 Feb 2026 11:35:58 -0600' + ) + result = parse_received_headers(msg) + assert result['sent_server'] == 'mail-sender.example.com' + assert result['received_server'] == 'mx.receiver.example.com' + + +class TestNoReceivedHeaders: + def test_all_values_none(self): + msg = EmailMessage() + result = parse_received_headers(msg) + assert result['sent_time'] is None + assert result['sent_server'] is None + assert result['received_time'] is None + assert result['received_server'] is None + + +class TestByButNoFrom: + def test_falls_back_to_first_header(self): + msg = EmailMessage() + msg['Received'] = 'by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600' + result = parse_received_headers(msg) + assert result['received_server'] == 'internal.example.com' + assert result['received_time'] == 'Thu, 05 Feb 2026 11:36:10 -0600' + # No from in any header, so sent_server stays None + assert result['sent_server'] is None + + +class TestMultilineFoldedHeader: + def test_normalizes_whitespace(self): + # Use email.message_from_string to parse raw folded headers + # (EmailMessage policy rejects embedded CRLF in set values) + raw = ( + "From: test@example.com\r\n" + "Received: from mail-sender.example.com\r\n" + " by mx.receiver.example.com\r\n" + " with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600\r\n" + "\r\n" + "body\r\n" + ) + msg = email.message_from_string(raw) + result = parse_received_headers(msg) + assert result['sent_server'] == 'mail-sender.example.com' + assert result['received_server'] == 'mx.receiver.example.com' + + +class TestMalformedTimestamp: + def test_no_semicolon(self): + msg = EmailMessage() + msg['Received'] = 'from sender.example.com by receiver.example.com with SMTP' + result = parse_received_headers(msg) + assert result['sent_server'] == 'sender.example.com' + assert result['received_server'] == 'receiver.example.com' + assert result['sent_time'] is None + assert result['received_time'] is None |
