aboutsummaryrefslogtreecommitdiff
path: root/claude-templates/.ai/scripts/tests/test_process_eml.py
blob: 612cbb162bf82b7426d10a059ebdd1024bb1cf9b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
"""Integration tests for process_eml() — full pipeline with --output-dir."""

import os
import shutil
import sys

sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))

import importlib.util
spec = importlib.util.spec_from_file_location(
    "eml_script",
    os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
)
eml_script = importlib.util.module_from_spec(spec)
spec.loader.exec_module(eml_script)

process_eml = eml_script.process_eml

import pytest


FIXTURES = os.path.join(os.path.dirname(__file__), 'fixtures')


class TestPlainTextPipeline:
    def test_creates_eml_and_txt(self, tmp_path):
        eml_src = os.path.join(FIXTURES, 'plain-text.eml')
        # Copy fixture to tmp_path so temp dir can be created as sibling
        working_eml = tmp_path / "inbox" / "message.eml"
        working_eml.parent.mkdir()
        shutil.copy2(eml_src, working_eml)

        output_dir = tmp_path / "output"
        result = process_eml(str(working_eml), str(output_dir))

        # Should have exactly 2 files: .eml and .txt
        assert len(result['files']) == 2
        eml_file = result['files'][0]
        txt_file = result['files'][1]

        assert eml_file['type'] == 'eml'
        assert txt_file['type'] == 'txt'
        assert eml_file['name'].endswith('.eml')
        assert txt_file['name'].endswith('.txt')

        # Files exist in output dir
        assert os.path.isfile(eml_file['path'])
        assert os.path.isfile(txt_file['path'])

        # Filenames contain expected components
        assert 'Jonathan' in eml_file['name']
        assert 'EMAIL' in eml_file['name']
        assert '2026-02-05' in eml_file['name']

        # Temp dir cleaned up (no extract-* dirs in inbox)
        inbox_contents = os.listdir(str(tmp_path / "inbox"))
        assert not any(d.startswith('extract-') for d in inbox_contents)


class TestHtmlFallbackPipeline:
    def test_txt_contains_converted_html(self, tmp_path):
        eml_src = os.path.join(FIXTURES, 'html-only.eml')
        working_eml = tmp_path / "inbox" / "message.eml"
        working_eml.parent.mkdir()
        shutil.copy2(eml_src, working_eml)

        output_dir = tmp_path / "output"
        result = process_eml(str(working_eml), str(output_dir))

        txt_file = result['files'][1]
        with open(txt_file['path'], 'r') as f:
            content = f.read()

        # Should be converted, not raw HTML
        assert '<p>' not in content
        assert '<strong>' not in content
        assert 'HTML' in content


class TestAttachmentPipeline:
    def test_eml_txt_and_attachment_created(self, tmp_path):
        eml_src = os.path.join(FIXTURES, 'with-attachment.eml')
        working_eml = tmp_path / "inbox" / "message.eml"
        working_eml.parent.mkdir()
        shutil.copy2(eml_src, working_eml)

        output_dir = tmp_path / "output"
        result = process_eml(str(working_eml), str(output_dir))

        assert len(result['files']) == 3
        types = [f['type'] for f in result['files']]
        assert types == ['eml', 'txt', 'attach']

        # Attachment is auto-renamed
        attach_file = result['files'][2]
        assert 'ATTACH' in attach_file['name']
        assert attach_file['name'].endswith('.pdf')
        assert os.path.isfile(attach_file['path'])


class TestDuplicateAttachmentNames:
    """Outlook inlines the same signature image multiple times under one
    filename. Each part must be saved to its own file, not silently
    overwritten in temp_dir (which leaves the move step pointing at a
    missing file)."""

    def test_each_duplicate_attachment_kept_with_counter_suffix(self, tmp_path):
        eml_src = os.path.join(FIXTURES, 'duplicate-attachment-names.eml')
        working_eml = tmp_path / "inbox" / "message.eml"
        working_eml.parent.mkdir()
        shutil.copy2(eml_src, working_eml)

        output_dir = tmp_path / "output"
        result = process_eml(str(working_eml), str(output_dir))

        # eml + txt + 3 attachments
        assert len(result['files']) == 5
        attach_files = [f for f in result['files'] if f['type'] == 'attach']
        assert len(attach_files) == 3

        # Each file must have a unique name and exist on disk with its own
        # bytes — overwriting earlier ones would leave fewer than 3 files
        # and the move step would fail.
        names = [f['name'] for f in attach_files]
        assert len(set(names)) == 3
        for f in attach_files:
            assert os.path.isfile(f['path'])

        # Bytes are preserved per part (fixture has -1, -2, -3 payloads)
        contents = sorted(open(f['path'], 'rb').read() for f in attach_files)
        assert contents == [b'image-content-1', b'image-content-2', b'image-content-3']


class TestCollisionDetection:
    def test_raises_on_existing_file(self, tmp_path):
        eml_src = os.path.join(FIXTURES, 'plain-text.eml')
        working_eml = tmp_path / "inbox" / "message.eml"
        working_eml.parent.mkdir()
        shutil.copy2(eml_src, working_eml)

        output_dir = tmp_path / "output"
        # Run once to create files
        result = process_eml(str(working_eml), str(output_dir))

        # Run again — should raise FileExistsError
        with pytest.raises(FileExistsError, match="Collision"):
            process_eml(str(working_eml), str(output_dir))


class TestMissingOutputDir:
    def test_creates_directory(self, tmp_path):
        eml_src = os.path.join(FIXTURES, 'plain-text.eml')
        working_eml = tmp_path / "inbox" / "message.eml"
        working_eml.parent.mkdir()
        shutil.copy2(eml_src, working_eml)

        output_dir = tmp_path / "new" / "nested" / "output"
        assert not output_dir.exists()

        result = process_eml(str(working_eml), str(output_dir))
        assert output_dir.exists()
        assert len(result['files']) == 2