1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
"""Integration tests for process_eml() — full pipeline with --output-dir."""
import os
import shutil
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
import importlib.util
spec = importlib.util.spec_from_file_location(
"eml_script",
os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
)
eml_script = importlib.util.module_from_spec(spec)
spec.loader.exec_module(eml_script)
process_eml = eml_script.process_eml
import pytest
FIXTURES = os.path.join(os.path.dirname(__file__), 'fixtures')
class TestPlainTextPipeline:
def test_creates_eml_and_txt(self, tmp_path):
eml_src = os.path.join(FIXTURES, 'plain-text.eml')
# Copy fixture to tmp_path so temp dir can be created as sibling
working_eml = tmp_path / "inbox" / "message.eml"
working_eml.parent.mkdir()
shutil.copy2(eml_src, working_eml)
output_dir = tmp_path / "output"
result = process_eml(str(working_eml), str(output_dir))
# Should have exactly 2 files: .eml and .txt
assert len(result['files']) == 2
eml_file = result['files'][0]
txt_file = result['files'][1]
assert eml_file['type'] == 'eml'
assert txt_file['type'] == 'txt'
assert eml_file['name'].endswith('.eml')
assert txt_file['name'].endswith('.txt')
# Files exist in output dir
assert os.path.isfile(eml_file['path'])
assert os.path.isfile(txt_file['path'])
# Filenames contain expected components
assert 'Jonathan' in eml_file['name']
assert 'EMAIL' in eml_file['name']
assert '2026-02-05' in eml_file['name']
# Temp dir cleaned up (no extract-* dirs in inbox)
inbox_contents = os.listdir(str(tmp_path / "inbox"))
assert not any(d.startswith('extract-') for d in inbox_contents)
class TestHtmlFallbackPipeline:
def test_txt_contains_converted_html(self, tmp_path):
eml_src = os.path.join(FIXTURES, 'html-only.eml')
working_eml = tmp_path / "inbox" / "message.eml"
working_eml.parent.mkdir()
shutil.copy2(eml_src, working_eml)
output_dir = tmp_path / "output"
result = process_eml(str(working_eml), str(output_dir))
txt_file = result['files'][1]
with open(txt_file['path'], 'r') as f:
content = f.read()
# Should be converted, not raw HTML
assert '<p>' not in content
assert '<strong>' not in content
assert 'HTML' in content
class TestAttachmentPipeline:
def test_eml_txt_and_attachment_created(self, tmp_path):
eml_src = os.path.join(FIXTURES, 'with-attachment.eml')
working_eml = tmp_path / "inbox" / "message.eml"
working_eml.parent.mkdir()
shutil.copy2(eml_src, working_eml)
output_dir = tmp_path / "output"
result = process_eml(str(working_eml), str(output_dir))
assert len(result['files']) == 3
types = [f['type'] for f in result['files']]
assert types == ['eml', 'txt', 'attach']
# Attachment is auto-renamed
attach_file = result['files'][2]
assert 'ATTACH' in attach_file['name']
assert attach_file['name'].endswith('.pdf')
assert os.path.isfile(attach_file['path'])
class TestCollisionDetection:
def test_raises_on_existing_file(self, tmp_path):
eml_src = os.path.join(FIXTURES, 'plain-text.eml')
working_eml = tmp_path / "inbox" / "message.eml"
working_eml.parent.mkdir()
shutil.copy2(eml_src, working_eml)
output_dir = tmp_path / "output"
# Run once to create files
result = process_eml(str(working_eml), str(output_dir))
# Run again — should raise FileExistsError
with pytest.raises(FileExistsError, match="Collision"):
process_eml(str(working_eml), str(output_dir))
class TestMissingOutputDir:
def test_creates_directory(self, tmp_path):
eml_src = os.path.join(FIXTURES, 'plain-text.eml')
working_eml = tmp_path / "inbox" / "message.eml"
working_eml.parent.mkdir()
shutil.copy2(eml_src, working_eml)
output_dir = tmp_path / "new" / "nested" / "output"
assert not output_dir.exists()
result = process_eml(str(working_eml), str(output_dir))
assert output_dir.exists()
assert len(result['files']) == 2
|