File size: 10,402 Bytes
2652f92
e3c68ad
acfdf18
eae1eaa
184ba46
eae1eaa
 
184ba46
2652f92
eae1eaa
 
2652f92
d05b9c8
2652f92
 
 
 
d05b9c8
 
 
2652f92
 
d05b9c8
 
 
 
81eb6d8
eae1eaa
2652f92
 
d05b9c8
 
 
2652f92
 
dd82ad4
2652f92
d05b9c8
81eb6d8
2652f92
 
 
81eb6d8
2652f92
 
 
81eb6d8
2652f92
d05b9c8
81eb6d8
2652f92
 
 
81eb6d8
2652f92
 
 
d05b9c8
81eb6d8
2652f92
 
acfdf18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c418aa6
 
 
 
 
 
 
dd82ad4
c418aa6
 
 
 
 
 
 
 
 
 
dd82ad4
c418aa6
 
 
 
 
 
 
 
 
 
dd82ad4
c418aa6
 
 
 
 
 
 
 
 
 
dd82ad4
c418aa6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
"""Unit tests for ContentExtractor class."""

from unittest.mock import MagicMock, patch

from yomitalk.components.content_extractor import ContentExtractor


class TestContentExtractor:
    """Test class for ContentExtractor."""

    def setup_method(self):
        """Set up test fixtures before each test method is run."""
        # No need to create instance since all methods are now classmethods

    def test_initialization(self):
        """Test that ContentExtractor initializes correctly."""
        # Check that supported extensions are properly defined
        assert isinstance(ContentExtractor.SUPPORTED_TEXT_EXTENSIONS, list)
        assert isinstance(ContentExtractor.SUPPORTED_PDF_EXTENSIONS, list)
        assert isinstance(ContentExtractor.SUPPORTED_EXTENSIONS, list)

        # Check that text and PDF extensions are included in supported extensions
        for ext in ContentExtractor.SUPPORTED_TEXT_EXTENSIONS:
            assert ext in ContentExtractor.SUPPORTED_EXTENSIONS
        for ext in ContentExtractor.SUPPORTED_PDF_EXTENSIONS:
            assert ext in ContentExtractor.SUPPORTED_EXTENSIONS

    def test_supported_extensions(self):
        """Test the supported extensions."""
        # Test that common extensions are included
        assert ".txt" in ContentExtractor.SUPPORTED_TEXT_EXTENSIONS
        assert ".md" in ContentExtractor.SUPPORTED_TEXT_EXTENSIONS
        assert ".pdf" in ContentExtractor.SUPPORTED_PDF_EXTENSIONS

        # Check the combined list
        all_extensions = ContentExtractor.SUPPORTED_TEXT_EXTENSIONS + ContentExtractor.SUPPORTED_PDF_EXTENSIONS
        for ext in all_extensions:
            assert ext in ContentExtractor.SUPPORTED_EXTENSIONS

    def test_extract_file_content(self):
        """Test extracting content from a file object."""
        # Mock a file object
        mock_file = MagicMock()
        mock_file.name = "test.txt"
        mock_file.read.return_value = b"This is test content."
        mock_file.tell.return_value = 0

        # Test with the mock file
        extension, content = ContentExtractor.extract_file_content(mock_file)

        # Verify results
        assert extension == ".txt"
        assert content == b"This is test content."

    def test_extract_text(self):
        """Test the extract_text method."""
        # Test with None input
        assert ContentExtractor.extract_text(None) == "Please upload a file."

        # Mock a valid file object for later implementation
        # of more comprehensive tests as needed

    def test_is_url_valid_urls(self):
        """Test is_url method with valid URLs."""
        valid_urls = [
            "https://www.example.com",
            "http://example.com",
            "https://youtube.com/watch?v=dQw4w9WgXcQ",
            "https://en.wikipedia.org/wiki/Test",
            "https://feeds.feedburner.com/example",
            "https://www.bing.com/search?q=test",
        ]

        for url in valid_urls:
            assert ContentExtractor.is_url(url) is True

    def test_is_url_invalid_urls(self):
        """Test is_url method with invalid URLs."""
        invalid_urls = [
            "",
            "not a url",
            "example.com",  # Missing scheme
            "file://local/path",  # Local file path
            "ftp://example.com",  # Non-HTTP scheme
            "https://",  # Missing netloc
            "://example.com",  # Missing scheme
        ]

        for url in invalid_urls:
            assert ContentExtractor.is_url(url) is False

    def test_is_url_edge_cases(self):
        """Test is_url method with edge cases."""
        # Test with whitespace
        assert ContentExtractor.is_url("  https://example.com  ") is True

        # Test with None input
        assert ContentExtractor.is_url(None) is False

    @patch("yomitalk.components.content_extractor._markdown_converter")
    def test_extract_from_url_success(self, mock_converter):
        """Test successful URL text extraction."""
        # Mock the converter response
        mock_result = MagicMock()
        mock_result.text_content = "Extracted content from URL"
        mock_converter.convert.return_value = mock_result

        url = "https://example.com/article"
        result = ContentExtractor.extract_from_url(url)

        assert result == "Extracted content from URL"
        mock_converter.convert.assert_called_once_with(url)

    @patch("yomitalk.components.content_extractor._markdown_converter")
    def test_extract_from_url_empty_content(self, mock_converter):
        """Test URL extraction with empty content."""
        # Mock the converter response with empty content
        mock_result = MagicMock()
        mock_result.text_content = None
        mock_converter.convert.return_value = mock_result

        url = "https://example.com/empty"
        result = ContentExtractor.extract_from_url(url)

        assert result == ""
        mock_converter.convert.assert_called_once_with(url)

    @patch("yomitalk.components.content_extractor._markdown_converter")
    def test_extract_from_url_conversion_error(self, mock_converter):
        """Test URL extraction with conversion error."""
        # Mock the converter to raise an exception
        mock_converter.convert.side_effect = Exception("Connection error")

        url = "https://example.com/error"
        result = ContentExtractor.extract_from_url(url)

        assert "URL conversion error: Connection error" in result
        mock_converter.convert.assert_called_once_with(url)

    def test_extract_from_url_invalid_url(self):
        """Test URL extraction with invalid URL."""
        invalid_url = "not a url"
        result = ContentExtractor.extract_from_url(invalid_url)

        assert result == "Invalid URL format."

    @patch("yomitalk.components.content_extractor._markdown_converter")
    def test_extract_from_url_youtube(self, mock_converter):
        """Test URL extraction from YouTube."""
        # Mock the converter response for YouTube
        mock_result = MagicMock()
        mock_result.text_content = "YouTube video transcript: How to code"
        mock_converter.convert.return_value = mock_result

        youtube_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
        result = ContentExtractor.extract_from_url(youtube_url)

        assert result == "YouTube video transcript: How to code"
        mock_converter.convert.assert_called_once_with(youtube_url)

    @patch("yomitalk.components.content_extractor._markdown_converter")
    def test_extract_from_url_wikipedia(self, mock_converter):
        """Test URL extraction from Wikipedia."""
        # Mock the converter response for Wikipedia
        mock_result = MagicMock()
        mock_result.text_content = "Wikipedia article about machine learning..."
        mock_converter.convert.return_value = mock_result

        wikipedia_url = "https://en.wikipedia.org/wiki/Machine_learning"
        result = ContentExtractor.extract_from_url(wikipedia_url)

        assert result == "Wikipedia article about machine learning..."
        mock_converter.convert.assert_called_once_with(wikipedia_url)

    @patch("yomitalk.components.content_extractor._markdown_converter")
    def test_extract_from_url_rss_feed(self, mock_converter):
        """Test URL extraction from RSS feed."""
        # Mock the converter response for RSS feed
        mock_result = MagicMock()
        mock_result.text_content = "RSS feed content: Latest news articles..."
        mock_converter.convert.return_value = mock_result

        rss_url = "https://feeds.feedburner.com/example"
        result = ContentExtractor.extract_from_url(rss_url)

        assert result == "RSS feed content: Latest news articles..."
        mock_converter.convert.assert_called_once_with(rss_url)

    def test_append_text_with_source_no_separator(self):
        """Test appending text without separator."""
        existing_text = "Existing content"
        new_text = "New content"
        source_name = "test.txt"

        result = ContentExtractor.append_text_with_source(existing_text, new_text, source_name, add_separator=False)

        expected = "Existing content\n\nNew content"
        assert result == expected

    def test_append_text_with_source_with_separator(self):
        """Test appending text with separator."""
        existing_text = "Existing content"
        new_text = "New content"
        source_name = "test.txt"

        result = ContentExtractor.append_text_with_source(existing_text, new_text, source_name, add_separator=True)

        expected = "Existing content\n\n---\n**Source: test.txt**\n\nNew content"
        assert result == expected

    def test_append_text_with_source_empty_existing(self):
        """Test appending to empty existing text."""
        existing_text = ""
        new_text = "New content"
        source_name = "test.txt"

        result = ContentExtractor.append_text_with_source(existing_text, new_text, source_name, add_separator=True)

        expected = "**Source: test.txt**\n\nNew content"
        assert result == expected

    def test_append_text_with_source_empty_new_text(self):
        """Test appending empty new text."""
        existing_text = "Existing content"
        new_text = ""
        source_name = "test.txt"

        result = ContentExtractor.append_text_with_source(existing_text, new_text, source_name, add_separator=True)

        # Should return existing text unchanged when new text is empty
        assert result == existing_text

    def test_get_source_name_from_file(self):
        """Test extracting source name from file object."""
        # Mock file object with name attribute
        mock_file = MagicMock()
        mock_file.name = "/path/to/document.pdf"

        result = ContentExtractor.get_source_name_from_file(mock_file)
        assert result == "document.pdf"

    def test_get_source_name_from_file_none(self):
        """Test extracting source name from None file object."""
        result = ContentExtractor.get_source_name_from_file(None)
        assert result == "Unknown File"

    def test_get_source_name_from_file_no_name(self):
        """Test extracting source name from file object without name."""
        mock_file = MagicMock()
        del mock_file.name  # Remove name attribute

        result = ContentExtractor.get_source_name_from_file(mock_file)
        assert result == "Uploaded File"