| """Unit tests for ContentExtractor class.""" |
|
|
| from unittest.mock import MagicMock, patch |
|
|
| from yomitalk.components.content_extractor import ContentExtractor |
|
|
|
|
| class TestContentExtractor: |
| """Test class for ContentExtractor.""" |
|
|
| def setup_method(self): |
| """Set up test fixtures before each test method is run.""" |
| |
|
|
| def test_initialization(self): |
| """Test that ContentExtractor initializes correctly.""" |
| |
| assert isinstance(ContentExtractor.SUPPORTED_TEXT_EXTENSIONS, list) |
| assert isinstance(ContentExtractor.SUPPORTED_PDF_EXTENSIONS, list) |
| assert isinstance(ContentExtractor.SUPPORTED_EXTENSIONS, list) |
|
|
| |
| for ext in ContentExtractor.SUPPORTED_TEXT_EXTENSIONS: |
| assert ext in ContentExtractor.SUPPORTED_EXTENSIONS |
| for ext in ContentExtractor.SUPPORTED_PDF_EXTENSIONS: |
| assert ext in ContentExtractor.SUPPORTED_EXTENSIONS |
|
|
| def test_supported_extensions(self): |
| """Test the supported extensions.""" |
| |
| assert ".txt" in ContentExtractor.SUPPORTED_TEXT_EXTENSIONS |
| assert ".md" in ContentExtractor.SUPPORTED_TEXT_EXTENSIONS |
| assert ".pdf" in ContentExtractor.SUPPORTED_PDF_EXTENSIONS |
|
|
| |
| all_extensions = ContentExtractor.SUPPORTED_TEXT_EXTENSIONS + ContentExtractor.SUPPORTED_PDF_EXTENSIONS |
| for ext in all_extensions: |
| assert ext in ContentExtractor.SUPPORTED_EXTENSIONS |
|
|
| def test_extract_file_content(self): |
| """Test extracting content from a file object.""" |
| |
| mock_file = MagicMock() |
| mock_file.name = "test.txt" |
| mock_file.read.return_value = b"This is test content." |
| mock_file.tell.return_value = 0 |
|
|
| |
| extension, content = ContentExtractor.extract_file_content(mock_file) |
|
|
| |
| assert extension == ".txt" |
| assert content == b"This is test content." |
|
|
| def test_extract_text(self): |
| """Test the extract_text method.""" |
| |
| assert ContentExtractor.extract_text(None) == "Please upload a file." |
|
|
| |
| |
|
|
| def test_is_url_valid_urls(self): |
| """Test is_url method with valid URLs.""" |
| valid_urls = [ |
| "https://www.example.com", |
| "http://example.com", |
| "https://youtube.com/watch?v=dQw4w9WgXcQ", |
| "https://en.wikipedia.org/wiki/Test", |
| "https://feeds.feedburner.com/example", |
| "https://www.bing.com/search?q=test", |
| ] |
|
|
| for url in valid_urls: |
| assert ContentExtractor.is_url(url) is True |
|
|
| def test_is_url_invalid_urls(self): |
| """Test is_url method with invalid URLs.""" |
| invalid_urls = [ |
| "", |
| "not a url", |
| "example.com", |
| "file://local/path", |
| "ftp://example.com", |
| "https://", |
| "://example.com", |
| ] |
|
|
| for url in invalid_urls: |
| assert ContentExtractor.is_url(url) is False |
|
|
| def test_is_url_edge_cases(self): |
| """Test is_url method with edge cases.""" |
| |
| assert ContentExtractor.is_url(" https://example.com ") is True |
|
|
| |
| assert ContentExtractor.is_url(None) is False |
|
|
| @patch("yomitalk.components.content_extractor._markdown_converter") |
| def test_extract_from_url_success(self, mock_converter): |
| """Test successful URL text extraction.""" |
| |
| mock_result = MagicMock() |
| mock_result.text_content = "Extracted content from URL" |
| mock_converter.convert.return_value = mock_result |
|
|
| url = "https://example.com/article" |
| result = ContentExtractor.extract_from_url(url) |
|
|
| assert result == "Extracted content from URL" |
| mock_converter.convert.assert_called_once_with(url) |
|
|
| @patch("yomitalk.components.content_extractor._markdown_converter") |
| def test_extract_from_url_empty_content(self, mock_converter): |
| """Test URL extraction with empty content.""" |
| |
| mock_result = MagicMock() |
| mock_result.text_content = None |
| mock_converter.convert.return_value = mock_result |
|
|
| url = "https://example.com/empty" |
| result = ContentExtractor.extract_from_url(url) |
|
|
| assert result == "" |
| mock_converter.convert.assert_called_once_with(url) |
|
|
| @patch("yomitalk.components.content_extractor._markdown_converter") |
| def test_extract_from_url_conversion_error(self, mock_converter): |
| """Test URL extraction with conversion error.""" |
| |
| mock_converter.convert.side_effect = Exception("Connection error") |
|
|
| url = "https://example.com/error" |
| result = ContentExtractor.extract_from_url(url) |
|
|
| assert "URL conversion error: Connection error" in result |
| mock_converter.convert.assert_called_once_with(url) |
|
|
| def test_extract_from_url_invalid_url(self): |
| """Test URL extraction with invalid URL.""" |
| invalid_url = "not a url" |
| result = ContentExtractor.extract_from_url(invalid_url) |
|
|
| assert result == "Invalid URL format." |
|
|
| @patch("yomitalk.components.content_extractor._markdown_converter") |
| def test_extract_from_url_youtube(self, mock_converter): |
| """Test URL extraction from YouTube.""" |
| |
| mock_result = MagicMock() |
| mock_result.text_content = "YouTube video transcript: How to code" |
| mock_converter.convert.return_value = mock_result |
|
|
| youtube_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ" |
| result = ContentExtractor.extract_from_url(youtube_url) |
|
|
| assert result == "YouTube video transcript: How to code" |
| mock_converter.convert.assert_called_once_with(youtube_url) |
|
|
| @patch("yomitalk.components.content_extractor._markdown_converter") |
| def test_extract_from_url_wikipedia(self, mock_converter): |
| """Test URL extraction from Wikipedia.""" |
| |
| mock_result = MagicMock() |
| mock_result.text_content = "Wikipedia article about machine learning..." |
| mock_converter.convert.return_value = mock_result |
|
|
| wikipedia_url = "https://en.wikipedia.org/wiki/Machine_learning" |
| result = ContentExtractor.extract_from_url(wikipedia_url) |
|
|
| assert result == "Wikipedia article about machine learning..." |
| mock_converter.convert.assert_called_once_with(wikipedia_url) |
|
|
| @patch("yomitalk.components.content_extractor._markdown_converter") |
| def test_extract_from_url_rss_feed(self, mock_converter): |
| """Test URL extraction from RSS feed.""" |
| |
| mock_result = MagicMock() |
| mock_result.text_content = "RSS feed content: Latest news articles..." |
| mock_converter.convert.return_value = mock_result |
|
|
| rss_url = "https://feeds.feedburner.com/example" |
| result = ContentExtractor.extract_from_url(rss_url) |
|
|
| assert result == "RSS feed content: Latest news articles..." |
| mock_converter.convert.assert_called_once_with(rss_url) |
|
|
| def test_append_text_with_source_no_separator(self): |
| """Test appending text without separator.""" |
| existing_text = "Existing content" |
| new_text = "New content" |
| source_name = "test.txt" |
|
|
| result = ContentExtractor.append_text_with_source(existing_text, new_text, source_name, add_separator=False) |
|
|
| expected = "Existing content\n\nNew content" |
| assert result == expected |
|
|
| def test_append_text_with_source_with_separator(self): |
| """Test appending text with separator.""" |
| existing_text = "Existing content" |
| new_text = "New content" |
| source_name = "test.txt" |
|
|
| result = ContentExtractor.append_text_with_source(existing_text, new_text, source_name, add_separator=True) |
|
|
| expected = "Existing content\n\n---\n**Source: test.txt**\n\nNew content" |
| assert result == expected |
|
|
| def test_append_text_with_source_empty_existing(self): |
| """Test appending to empty existing text.""" |
| existing_text = "" |
| new_text = "New content" |
| source_name = "test.txt" |
|
|
| result = ContentExtractor.append_text_with_source(existing_text, new_text, source_name, add_separator=True) |
|
|
| expected = "**Source: test.txt**\n\nNew content" |
| assert result == expected |
|
|
| def test_append_text_with_source_empty_new_text(self): |
| """Test appending empty new text.""" |
| existing_text = "Existing content" |
| new_text = "" |
| source_name = "test.txt" |
|
|
| result = ContentExtractor.append_text_with_source(existing_text, new_text, source_name, add_separator=True) |
|
|
| |
| assert result == existing_text |
|
|
| def test_get_source_name_from_file(self): |
| """Test extracting source name from file object.""" |
| |
| mock_file = MagicMock() |
| mock_file.name = "/path/to/document.pdf" |
|
|
| result = ContentExtractor.get_source_name_from_file(mock_file) |
| assert result == "document.pdf" |
|
|
| def test_get_source_name_from_file_none(self): |
| """Test extracting source name from None file object.""" |
| result = ContentExtractor.get_source_name_from_file(None) |
| assert result == "Unknown File" |
|
|
| def test_get_source_name_from_file_no_name(self): |
| """Test extracting source name from file object without name.""" |
| mock_file = MagicMock() |
| del mock_file.name |
|
|
| result = ContentExtractor.get_source_name_from_file(mock_file) |
| assert result == "Uploaded File" |
|
|