| from pathlib import Path |
|
|
| import pytest |
|
|
|
|
| @pytest.mark.asyncio |
| async def test_file_processor_pdf_magic_mode_uses_mineru_client(monkeypatch, tmp_path: Path): |
| from landppt.services.file_processor import FileProcessor |
|
|
| calls = [] |
|
|
| class FakeDBConfigService: |
| async def get_config_value(self, key: str, user_id=None): |
| if key == "mineru_api_key": |
| return "abc" |
| if key == "mineru_base_url": |
| return "https://mineru.net/api/v4" |
| return None |
|
|
| import landppt.services.db_config_service as db_mod |
|
|
| monkeypatch.setattr(db_mod, "get_db_config_service", lambda: FakeDBConfigService(), raising=True) |
|
|
| class FakeMineruAPIClient: |
| def __init__(self, api_key=None, base_url=None, timeout=60.0): |
| calls.append(("init", api_key, base_url)) |
| self.api_key = api_key |
| self.base_url = base_url |
|
|
| @property |
| def is_available(self): |
| return True |
|
|
| async def extract_markdown(self, file_path=None, pdf_url=None, **kwargs): |
| calls.append(("extract_markdown", file_path, pdf_url)) |
| return "MAGIC_CONTENT", {} |
|
|
| async def close(self): |
| calls.append(("close", None, None)) |
|
|
| import summeryanyfile.core.mineru_api_client as mineru_mod |
|
|
| monkeypatch.setattr(mineru_mod, "MineruAPIClient", FakeMineruAPIClient, raising=True) |
|
|
| from landppt.auth.request_context import current_user_id |
|
|
| token = current_user_id.set(1) |
| try: |
| pdf_path = tmp_path / "demo.pdf" |
| pdf_path.write_bytes(b"%PDF-1.4\n%fake\n") |
|
|
| fp = FileProcessor() |
| result = await fp.process_file(str(pdf_path), "demo.pdf", file_processing_mode="magic_pdf") |
| assert result.processed_content == "MAGIC_CONTENT" |
| assert any(c[0] == "extract_markdown" for c in calls) |
| finally: |
| current_user_id.reset(token) |
|
|
|
|
| def test_document_processor_passes_explicit_mineru_config(monkeypatch, tmp_path: Path): |
| from summeryanyfile.core.document_processor import DocumentProcessor |
|
|
| calls = [] |
|
|
| class FakeMarkItDownConverter: |
| def __init__(self, **kwargs): |
| calls.append( |
| ( |
| "init", |
| kwargs.get("mineru_api_key"), |
| kwargs.get("mineru_base_url"), |
| kwargs.get("use_magic_pdf"), |
| ) |
| ) |
|
|
| def convert_file(self, file_path: str): |
| calls.append(("convert_file", file_path)) |
| return "MAGIC_CONTENT", "utf-8" |
|
|
| def clean_markdown_content(self, content: str): |
| return content |
|
|
| import summeryanyfile.core.document_processor as document_processor_mod |
|
|
| monkeypatch.setattr(document_processor_mod, "MarkItDownConverter", FakeMarkItDownConverter, raising=True) |
|
|
| pdf_path = tmp_path / "demo.pdf" |
| pdf_path.write_bytes(b"%PDF-1.4\n%fake\n") |
|
|
| processor = DocumentProcessor( |
| use_magic_pdf=True, |
| enable_cache=False, |
| mineru_api_key="abc", |
| mineru_base_url="https://mineru.net/api/v4", |
| ) |
| document = processor.load_document(str(pdf_path)) |
|
|
| assert document.content == "MAGIC_CONTENT" |
| assert ("init", "abc", "https://mineru.net/api/v4", True) in calls |
| assert any(call[0] == "convert_file" for call in calls) |
|
|