aboutsummaryrefslogtreecommitdiff
path: root/docs/scripts/tests/test_process_eml.py
blob: 26c5ad5065091033525c32e090c16415826be7a4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
"""Integration tests for process_eml() — full pipeline with --output-dir."""

import os
import shutil
import sys

sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))

import importlib.util
spec = importlib.util.spec_from_file_location(
    "eml_script",
    os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
)
eml_script = importlib.util.module_from_spec(spec)
spec.loader.exec_module(eml_script)

process_eml = eml_script.process_eml

import pytest


FIXTURES = os.path.join(os.path.dirname(__file__), 'fixtures')


class TestPlainTextPipeline:
    def test_creates_eml_and_txt(self, tmp_path):
        eml_src = os.path.join(FIXTURES, 'plain-text.eml')
        # Copy fixture to tmp_path so temp dir can be created as sibling
        working_eml = tmp_path / "inbox" / "message.eml"
        working_eml.parent.mkdir()
        shutil.copy2(eml_src, working_eml)

        output_dir = tmp_path / "output"
        result = process_eml(str(working_eml), str(output_dir))

        # Should have exactly 2 files: .eml and .txt
        assert len(result['files']) == 2
        eml_file = result['files'][0]
        txt_file = result['files'][1]

        assert eml_file['type'] == 'eml'
        assert txt_file['type'] == 'txt'
        assert eml_file['name'].endswith('.eml')
        assert txt_file['name'].endswith('.txt')

        # Files exist in output dir
        assert os.path.isfile(eml_file['path'])
        assert os.path.isfile(txt_file['path'])

        # Filenames contain expected components
        assert 'Jonathan' in eml_file['name']
        assert 'EMAIL' in eml_file['name']
        assert '2026-02-05' in eml_file['name']

        # Temp dir cleaned up (no extract-* dirs in inbox)
        inbox_contents = os.listdir(str(tmp_path / "inbox"))
        assert not any(d.startswith('extract-') for d in inbox_contents)


class TestHtmlFallbackPipeline:
    def test_txt_contains_converted_html(self, tmp_path):
        eml_src = os.path.join(FIXTURES, 'html-only.eml')
        working_eml = tmp_path / "inbox" / "message.eml"
        working_eml.parent.mkdir()
        shutil.copy2(eml_src, working_eml)

        output_dir = tmp_path / "output"
        result = process_eml(str(working_eml), str(output_dir))

        txt_file = result['files'][1]
        with open(txt_file['path'], 'r') as f:
            content = f.read()

        # Should be converted, not raw HTML
        assert '<p>' not in content
        assert '<strong>' not in content
        assert 'HTML' in content


class TestAttachmentPipeline:
    def test_eml_txt_and_attachment_created(self, tmp_path):
        eml_src = os.path.join(FIXTURES, 'with-attachment.eml')
        working_eml = tmp_path / "inbox" / "message.eml"
        working_eml.parent.mkdir()
        shutil.copy2(eml_src, working_eml)

        output_dir = tmp_path / "output"
        result = process_eml(str(working_eml), str(output_dir))

        assert len(result['files']) == 3
        types = [f['type'] for f in result['files']]
        assert types == ['eml', 'txt', 'attach']

        # Attachment is auto-renamed
        attach_file = result['files'][2]
        assert 'ATTACH' in attach_file['name']
        assert attach_file['name'].endswith('.pdf')
        assert os.path.isfile(attach_file['path'])


class TestCollisionDetection:
    def test_raises_on_existing_file(self, tmp_path):
        eml_src = os.path.join(FIXTURES, 'plain-text.eml')
        working_eml = tmp_path / "inbox" / "message.eml"
        working_eml.parent.mkdir()
        shutil.copy2(eml_src, working_eml)

        output_dir = tmp_path / "output"
        # Run once to create files
        result = process_eml(str(working_eml), str(output_dir))

        # Run again — should raise FileExistsError
        with pytest.raises(FileExistsError, match="Collision"):
            process_eml(str(working_eml), str(output_dir))


class TestMissingOutputDir:
    def test_creates_directory(self, tmp_path):
        eml_src = os.path.join(FIXTURES, 'plain-text.eml')
        working_eml = tmp_path / "inbox" / "message.eml"
        working_eml.parent.mkdir()
        shutil.copy2(eml_src, working_eml)

        output_dir = tmp_path / "new" / "nested" / "output"
        assert not output_dir.exists()

        result = process_eml(str(working_eml), str(output_dir))
        assert output_dir.exists()
        assert len(result['files']) == 2