ppt-web / tests /test_file_processor_pdf_magic_mode.py
26fwyzpz6f-max
Clean deploy without binary files
6aecb2e
Raw
History Blame Contribute Delete
3.33 kB
from pathlib import Path
import pytest
@pytest.mark.asyncio
async def test_file_processor_pdf_magic_mode_uses_mineru_client(monkeypatch, tmp_path: Path):
from landppt.services.file_processor import FileProcessor
calls = []
class FakeDBConfigService:
async def get_config_value(self, key: str, user_id=None):
if key == "mineru_api_key":
return "abc"
if key == "mineru_base_url":
return "https://mineru.net/api/v4"
return None
import landppt.services.db_config_service as db_mod
monkeypatch.setattr(db_mod, "get_db_config_service", lambda: FakeDBConfigService(), raising=True)
class FakeMineruAPIClient:
def __init__(self, api_key=None, base_url=None, timeout=60.0):
calls.append(("init", api_key, base_url))
self.api_key = api_key
self.base_url = base_url
@property
def is_available(self):
return True
async def extract_markdown(self, file_path=None, pdf_url=None, **kwargs):
calls.append(("extract_markdown", file_path, pdf_url))
return "MAGIC_CONTENT", {}
async def close(self):
calls.append(("close", None, None))
import summeryanyfile.core.mineru_api_client as mineru_mod
monkeypatch.setattr(mineru_mod, "MineruAPIClient", FakeMineruAPIClient, raising=True)
from landppt.auth.request_context import current_user_id
token = current_user_id.set(1)
try:
pdf_path = tmp_path / "demo.pdf"
pdf_path.write_bytes(b"%PDF-1.4\n%fake\n")
fp = FileProcessor()
result = await fp.process_file(str(pdf_path), "demo.pdf", file_processing_mode="magic_pdf")
assert result.processed_content == "MAGIC_CONTENT"
assert any(c[0] == "extract_markdown" for c in calls)
finally:
current_user_id.reset(token)
def test_document_processor_passes_explicit_mineru_config(monkeypatch, tmp_path: Path):
from summeryanyfile.core.document_processor import DocumentProcessor
calls = []
class FakeMarkItDownConverter:
def __init__(self, **kwargs):
calls.append(
(
"init",
kwargs.get("mineru_api_key"),
kwargs.get("mineru_base_url"),
kwargs.get("use_magic_pdf"),
)
)
def convert_file(self, file_path: str):
calls.append(("convert_file", file_path))
return "MAGIC_CONTENT", "utf-8"
def clean_markdown_content(self, content: str):
return content
import summeryanyfile.core.document_processor as document_processor_mod
monkeypatch.setattr(document_processor_mod, "MarkItDownConverter", FakeMarkItDownConverter, raising=True)
pdf_path = tmp_path / "demo.pdf"
pdf_path.write_bytes(b"%PDF-1.4\n%fake\n")
processor = DocumentProcessor(
use_magic_pdf=True,
enable_cache=False,
mineru_api_key="abc",
mineru_base_url="https://mineru.net/api/v4",
)
document = processor.load_document(str(pdf_path))
assert document.content == "MAGIC_CONTENT"
assert ("init", "abc", "https://mineru.net/api/v4", True) in calls
assert any(call[0] == "convert_file" for call in calls)