File size: 10,402 Bytes
2652f92 e3c68ad acfdf18 eae1eaa 184ba46 eae1eaa 184ba46 2652f92 eae1eaa 2652f92 d05b9c8 2652f92 d05b9c8 2652f92 d05b9c8 81eb6d8 eae1eaa 2652f92 d05b9c8 2652f92 dd82ad4 2652f92 d05b9c8 81eb6d8 2652f92 81eb6d8 2652f92 81eb6d8 2652f92 d05b9c8 81eb6d8 2652f92 81eb6d8 2652f92 d05b9c8 81eb6d8 2652f92 acfdf18 c418aa6 dd82ad4 c418aa6 dd82ad4 c418aa6 dd82ad4 c418aa6 dd82ad4 c418aa6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 | """Unit tests for ContentExtractor class."""
from unittest.mock import MagicMock, patch
from yomitalk.components.content_extractor import ContentExtractor
class TestContentExtractor:
"""Test class for ContentExtractor."""
def setup_method(self):
"""Set up test fixtures before each test method is run."""
# No need to create instance since all methods are now classmethods
def test_initialization(self):
"""Test that ContentExtractor initializes correctly."""
# Check that supported extensions are properly defined
assert isinstance(ContentExtractor.SUPPORTED_TEXT_EXTENSIONS, list)
assert isinstance(ContentExtractor.SUPPORTED_PDF_EXTENSIONS, list)
assert isinstance(ContentExtractor.SUPPORTED_EXTENSIONS, list)
# Check that text and PDF extensions are included in supported extensions
for ext in ContentExtractor.SUPPORTED_TEXT_EXTENSIONS:
assert ext in ContentExtractor.SUPPORTED_EXTENSIONS
for ext in ContentExtractor.SUPPORTED_PDF_EXTENSIONS:
assert ext in ContentExtractor.SUPPORTED_EXTENSIONS
def test_supported_extensions(self):
"""Test the supported extensions."""
# Test that common extensions are included
assert ".txt" in ContentExtractor.SUPPORTED_TEXT_EXTENSIONS
assert ".md" in ContentExtractor.SUPPORTED_TEXT_EXTENSIONS
assert ".pdf" in ContentExtractor.SUPPORTED_PDF_EXTENSIONS
# Check the combined list
all_extensions = ContentExtractor.SUPPORTED_TEXT_EXTENSIONS + ContentExtractor.SUPPORTED_PDF_EXTENSIONS
for ext in all_extensions:
assert ext in ContentExtractor.SUPPORTED_EXTENSIONS
def test_extract_file_content(self):
"""Test extracting content from a file object."""
# Mock a file object
mock_file = MagicMock()
mock_file.name = "test.txt"
mock_file.read.return_value = b"This is test content."
mock_file.tell.return_value = 0
# Test with the mock file
extension, content = ContentExtractor.extract_file_content(mock_file)
# Verify results
assert extension == ".txt"
assert content == b"This is test content."
def test_extract_text(self):
"""Test the extract_text method."""
# Test with None input
assert ContentExtractor.extract_text(None) == "Please upload a file."
# Mock a valid file object for later implementation
# of more comprehensive tests as needed
def test_is_url_valid_urls(self):
"""Test is_url method with valid URLs."""
valid_urls = [
"https://www.example.com",
"http://example.com",
"https://youtube.com/watch?v=dQw4w9WgXcQ",
"https://en.wikipedia.org/wiki/Test",
"https://feeds.feedburner.com/example",
"https://www.bing.com/search?q=test",
]
for url in valid_urls:
assert ContentExtractor.is_url(url) is True
def test_is_url_invalid_urls(self):
"""Test is_url method with invalid URLs."""
invalid_urls = [
"",
"not a url",
"example.com", # Missing scheme
"file://local/path", # Local file path
"ftp://example.com", # Non-HTTP scheme
"https://", # Missing netloc
"://example.com", # Missing scheme
]
for url in invalid_urls:
assert ContentExtractor.is_url(url) is False
def test_is_url_edge_cases(self):
"""Test is_url method with edge cases."""
# Test with whitespace
assert ContentExtractor.is_url(" https://example.com ") is True
# Test with None input
assert ContentExtractor.is_url(None) is False
@patch("yomitalk.components.content_extractor._markdown_converter")
def test_extract_from_url_success(self, mock_converter):
"""Test successful URL text extraction."""
# Mock the converter response
mock_result = MagicMock()
mock_result.text_content = "Extracted content from URL"
mock_converter.convert.return_value = mock_result
url = "https://example.com/article"
result = ContentExtractor.extract_from_url(url)
assert result == "Extracted content from URL"
mock_converter.convert.assert_called_once_with(url)
@patch("yomitalk.components.content_extractor._markdown_converter")
def test_extract_from_url_empty_content(self, mock_converter):
"""Test URL extraction with empty content."""
# Mock the converter response with empty content
mock_result = MagicMock()
mock_result.text_content = None
mock_converter.convert.return_value = mock_result
url = "https://example.com/empty"
result = ContentExtractor.extract_from_url(url)
assert result == ""
mock_converter.convert.assert_called_once_with(url)
@patch("yomitalk.components.content_extractor._markdown_converter")
def test_extract_from_url_conversion_error(self, mock_converter):
"""Test URL extraction with conversion error."""
# Mock the converter to raise an exception
mock_converter.convert.side_effect = Exception("Connection error")
url = "https://example.com/error"
result = ContentExtractor.extract_from_url(url)
assert "URL conversion error: Connection error" in result
mock_converter.convert.assert_called_once_with(url)
def test_extract_from_url_invalid_url(self):
"""Test URL extraction with invalid URL."""
invalid_url = "not a url"
result = ContentExtractor.extract_from_url(invalid_url)
assert result == "Invalid URL format."
@patch("yomitalk.components.content_extractor._markdown_converter")
def test_extract_from_url_youtube(self, mock_converter):
"""Test URL extraction from YouTube."""
# Mock the converter response for YouTube
mock_result = MagicMock()
mock_result.text_content = "YouTube video transcript: How to code"
mock_converter.convert.return_value = mock_result
youtube_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
result = ContentExtractor.extract_from_url(youtube_url)
assert result == "YouTube video transcript: How to code"
mock_converter.convert.assert_called_once_with(youtube_url)
@patch("yomitalk.components.content_extractor._markdown_converter")
def test_extract_from_url_wikipedia(self, mock_converter):
"""Test URL extraction from Wikipedia."""
# Mock the converter response for Wikipedia
mock_result = MagicMock()
mock_result.text_content = "Wikipedia article about machine learning..."
mock_converter.convert.return_value = mock_result
wikipedia_url = "https://en.wikipedia.org/wiki/Machine_learning"
result = ContentExtractor.extract_from_url(wikipedia_url)
assert result == "Wikipedia article about machine learning..."
mock_converter.convert.assert_called_once_with(wikipedia_url)
@patch("yomitalk.components.content_extractor._markdown_converter")
def test_extract_from_url_rss_feed(self, mock_converter):
"""Test URL extraction from RSS feed."""
# Mock the converter response for RSS feed
mock_result = MagicMock()
mock_result.text_content = "RSS feed content: Latest news articles..."
mock_converter.convert.return_value = mock_result
rss_url = "https://feeds.feedburner.com/example"
result = ContentExtractor.extract_from_url(rss_url)
assert result == "RSS feed content: Latest news articles..."
mock_converter.convert.assert_called_once_with(rss_url)
def test_append_text_with_source_no_separator(self):
"""Test appending text without separator."""
existing_text = "Existing content"
new_text = "New content"
source_name = "test.txt"
result = ContentExtractor.append_text_with_source(existing_text, new_text, source_name, add_separator=False)
expected = "Existing content\n\nNew content"
assert result == expected
def test_append_text_with_source_with_separator(self):
"""Test appending text with separator."""
existing_text = "Existing content"
new_text = "New content"
source_name = "test.txt"
result = ContentExtractor.append_text_with_source(existing_text, new_text, source_name, add_separator=True)
expected = "Existing content\n\n---\n**Source: test.txt**\n\nNew content"
assert result == expected
def test_append_text_with_source_empty_existing(self):
"""Test appending to empty existing text."""
existing_text = ""
new_text = "New content"
source_name = "test.txt"
result = ContentExtractor.append_text_with_source(existing_text, new_text, source_name, add_separator=True)
expected = "**Source: test.txt**\n\nNew content"
assert result == expected
def test_append_text_with_source_empty_new_text(self):
"""Test appending empty new text."""
existing_text = "Existing content"
new_text = ""
source_name = "test.txt"
result = ContentExtractor.append_text_with_source(existing_text, new_text, source_name, add_separator=True)
# Should return existing text unchanged when new text is empty
assert result == existing_text
def test_get_source_name_from_file(self):
"""Test extracting source name from file object."""
# Mock file object with name attribute
mock_file = MagicMock()
mock_file.name = "/path/to/document.pdf"
result = ContentExtractor.get_source_name_from_file(mock_file)
assert result == "document.pdf"
def test_get_source_name_from_file_none(self):
"""Test extracting source name from None file object."""
result = ContentExtractor.get_source_name_from_file(None)
assert result == "Unknown File"
def test_get_source_name_from_file_no_name(self):
"""Test extracting source name from file object without name."""
mock_file = MagicMock()
del mock_file.name # Remove name attribute
result = ContentExtractor.get_source_name_from_file(mock_file)
assert result == "Uploaded File"
|