yomitalk

Sleeping

File size: 10,402 Bytes

"""Unit tests for ContentExtractor class."""

from unittest.mock import MagicMock, patch

from yomitalk.components.content_extractor import ContentExtractor


class TestContentExtractor:
    """Test class for ContentExtractor."""

    def setup_method(self):
        """Set up test fixtures before each test method is run."""
        # No need to create instance since all methods are now classmethods

    def test_initialization(self):
        """Test that ContentExtractor initializes correctly."""
        # Check that supported extensions are properly defined
        assert isinstance(ContentExtractor.SUPPORTED_TEXT_EXTENSIONS, list)
        assert isinstance(ContentExtractor.SUPPORTED_PDF_EXTENSIONS, list)
        assert isinstance(ContentExtractor.SUPPORTED_EXTENSIONS, list)

        # Check that text and PDF extensions are included in supported extensions
        for ext in ContentExtractor.SUPPORTED_TEXT_EXTENSIONS:
            assert ext in ContentExtractor.SUPPORTED_EXTENSIONS
        for ext in ContentExtractor.SUPPORTED_PDF_EXTENSIONS:
            assert ext in ContentExtractor.SUPPORTED_EXTENSIONS

    def test_supported_extensions(self):
        """Test the supported extensions."""
        # Test that common extensions are included
        assert ".txt" in ContentExtractor.SUPPORTED_TEXT_EXTENSIONS
        assert ".md" in ContentExtractor.SUPPORTED_TEXT_EXTENSIONS
        assert ".pdf" in ContentExtractor.SUPPORTED_PDF_EXTENSIONS

        # Check the combined list
        all_extensions = ContentExtractor.SUPPORTED_TEXT_EXTENSIONS + ContentExtractor.SUPPORTED_PDF_EXTENSIONS
        for ext in all_extensions:
            assert ext in ContentExtractor.SUPPORTED_EXTENSIONS

    def test_extract_file_content(self):
        """Test extracting content from a file object."""
        # Mock a file object
        mock_file = MagicMock()
        mock_file.name = "test.txt"
        mock_file.read.return_value = b"This is test content."
        mock_file.tell.return_value = 0

        # Test with the mock file
        extension, content = ContentExtractor.extract_file_content(mock_file)

        # Verify results
        assert extension == ".txt"
        assert content == b"This is test content."

    def test_extract_text(self):
        """Test the extract_text method."""
        # Test with None input
        assert ContentExtractor.extract_text(None) == "Please upload a file."

        # Mock a valid file object for later implementation
        # of more comprehensive tests as needed

    def test_is_url_valid_urls(self):
        """Test is_url method with valid URLs."""
        valid_urls = [
            "https://www.example.com",
            "http://example.com",
            "https://youtube.com/watch?v=dQw4w9WgXcQ",
            "https://en.wikipedia.org/wiki/Test",
            "https://feeds.feedburner.com/example",
            "https://www.bing.com/search?q=test",
        ]

        for url in valid_urls:
            assert ContentExtractor.is_url(url) is True

    def test_is_url_invalid_urls(self):
        """Test is_url method with invalid URLs."""
        invalid_urls = [
            "",
            "not a url",
            "example.com",  # Missing scheme
            "file://local/path",  # Local file path
            "ftp://example.com",  # Non-HTTP scheme
            "https://",  # Missing netloc
            "://example.com",  # Missing scheme
        ]

        for url in invalid_urls:
            assert ContentExtractor.is_url(url) is False

    def test_is_url_edge_cases(self):
        """Test is_url method with edge cases."""
        # Test with whitespace
        assert ContentExtractor.is_url("  https://example.com  ") is True

        # Test with None input
        assert ContentExtractor.is_url(None) is False

    @patch("yomitalk.components.content_extractor._markdown_converter")
    def test_extract_from_url_success(self, mock_converter):
        """Test successful URL text extraction."""
        # Mock the converter response
        mock_result = MagicMock()
        mock_result.text_content = "Extracted content from URL"
        mock_converter.convert.return_value = mock_result

        url = "https://example.com/article"
        result = ContentExtractor.extract_from_url(url)

        assert result == "Extracted content from URL"
        mock_converter.convert.assert_called_once_with(url)

    @patch("yomitalk.components.content_extractor._markdown_converter")
    def test_extract_from_url_empty_content(self, mock_converter):
        """Test URL extraction with empty content."""
        # Mock the converter response with empty content
        mock_result = MagicMock()
        mock_result.text_content = None
        mock_converter.convert.return_value = mock_result

        url = "https://example.com/empty"
        result = ContentExtractor.extract_from_url(url)

        assert result == ""
        mock_converter.convert.assert_called_once_with(url)

    @patch("yomitalk.components.content_extractor._markdown_converter")
    def test_extract_from_url_conversion_error(self, mock_converter):
        """Test URL extraction with conversion error."""
        # Mock the converter to raise an exception
        mock_converter.convert.side_effect = Exception("Connection error")

        url = "https://example.com/error"
        result = ContentExtractor.extract_from_url(url)

        assert "URL conversion error: Connection error" in result
        mock_converter.convert.assert_called_once_with(url)

    def test_extract_from_url_invalid_url(self):
        """Test URL extraction with invalid URL."""
        invalid_url = "not a url"
        result = ContentExtractor.extract_from_url(invalid_url)

        assert result == "Invalid URL format."

    @patch("yomitalk.components.content_extractor._markdown_converter")
    def test_extract_from_url_youtube(self, mock_converter):
        """Test URL extraction from YouTube."""
        # Mock the converter response for YouTube
        mock_result = MagicMock()
        mock_result.text_content = "YouTube video transcript: How to code"
        mock_converter.convert.return_value = mock_result

        youtube_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
        result = ContentExtractor.extract_from_url(youtube_url)

        assert result == "YouTube video transcript: How to code"
        mock_converter.convert.assert_called_once_with(youtube_url)

    @patch("yomitalk.components.content_extractor._markdown_converter")
    def test_extract_from_url_wikipedia(self, mock_converter):
        """Test URL extraction from Wikipedia."""
        # Mock the converter response for Wikipedia
        mock_result = MagicMock()
        mock_result.text_content = "Wikipedia article about machine learning..."
        mock_converter.convert.return_value = mock_result

        wikipedia_url = "https://en.wikipedia.org/wiki/Machine_learning"
        result = ContentExtractor.extract_from_url(wikipedia_url)

        assert result == "Wikipedia article about machine learning..."
        mock_converter.convert.assert_called_once_with(wikipedia_url)

    @patch("yomitalk.components.content_extractor._markdown_converter")
    def test_extract_from_url_rss_feed(self, mock_converter):
        """Test URL extraction from RSS feed."""
        # Mock the converter response for RSS feed
        mock_result = MagicMock()
        mock_result.text_content = "RSS feed content: Latest news articles..."
        mock_converter.convert.return_value = mock_result

        rss_url = "https://feeds.feedburner.com/example"
        result = ContentExtractor.extract_from_url(rss_url)

        assert result == "RSS feed content: Latest news articles..."
        mock_converter.convert.assert_called_once_with(rss_url)

    def test_append_text_with_source_no_separator(self):
        """Test appending text without separator."""
        existing_text = "Existing content"
        new_text = "New content"
        source_name = "test.txt"

        result = ContentExtractor.append_text_with_source(existing_text, new_text, source_name, add_separator=False)

        expected = "Existing content\n\nNew content"
        assert result == expected

    def test_append_text_with_source_with_separator(self):
        """Test appending text with separator."""
        existing_text = "Existing content"
        new_text = "New content"
        source_name = "test.txt"

        result = ContentExtractor.append_text_with_source(existing_text, new_text, source_name, add_separator=True)

        expected = "Existing content\n\n---\n**Source: test.txt**\n\nNew content"
        assert result == expected

    def test_append_text_with_source_empty_existing(self):
        """Test appending to empty existing text."""
        existing_text = ""
        new_text = "New content"
        source_name = "test.txt"

        result = ContentExtractor.append_text_with_source(existing_text, new_text, source_name, add_separator=True)

        expected = "**Source: test.txt**\n\nNew content"
        assert result == expected

    def test_append_text_with_source_empty_new_text(self):
        """Test appending empty new text."""
        existing_text = "Existing content"
        new_text = ""
        source_name = "test.txt"

        result = ContentExtractor.append_text_with_source(existing_text, new_text, source_name, add_separator=True)

        # Should return existing text unchanged when new text is empty
        assert result == existing_text

    def test_get_source_name_from_file(self):
        """Test extracting source name from file object."""
        # Mock file object with name attribute
        mock_file = MagicMock()
        mock_file.name = "/path/to/document.pdf"

        result = ContentExtractor.get_source_name_from_file(mock_file)
        assert result == "document.pdf"

    def test_get_source_name_from_file_none(self):
        """Test extracting source name from None file object."""
        result = ContentExtractor.get_source_name_from_file(None)
        assert result == "Unknown File"

    def test_get_source_name_from_file_no_name(self):
        """Test extracting source name from file object without name."""
        mock_file = MagicMock()
        del mock_file.name  # Remove name attribute

        result = ContentExtractor.get_source_name_from_file(mock_file)
        assert result == "Uploaded File"