Spaces:
Sleeping
Sleeping
File size: 2,548 Bytes
6a8a839 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | from pathlib import Path
import fitz
from PIL import Image
from app.config import Settings
from app.services.image_processor import ImageProcessor
from app.services.pdf_processor import PdfProcessor
def make_settings(tmp_path: Path) -> Settings:
return Settings(
app_name="BitCheck Document Verification API",
version="1.0.0",
upload_dir=tmp_path / "uploads",
output_dir=tmp_path / "outputs",
max_upload_mb=20,
max_pdf_pages=5,
deepseek_api_key=None,
deepseek_base_url="https://api.deepseek.com",
deepseek_model="deepseek-chat",
log_level="INFO",
)
def create_pdf(path: Path, pages: int = 1) -> Path:
document = fitz.open()
for index in range(pages):
page = document.new_page()
page.insert_text((72, 72), f"BitCheck test page {index + 1}")
document.save(path)
document.close()
return path
def test_pdf_processing(tmp_path: Path) -> None:
pdf_path = create_pdf(tmp_path / "sample.pdf")
analysis = PdfProcessor(make_settings(tmp_path)).process(pdf_path, max_pages=5)
assert analysis.checked is True
assert analysis.is_pdf is True
assert analysis.is_encrypted is False
assert analysis.has_text_layer is True
assert analysis.image_only_pdf is False
assert analysis.page_count == 1
assert analysis.pages_processed == 1
assert "BitCheck test page 1" in analysis.pdf_text
assert len(analysis.rendered_pages) == 1
assert Path(analysis.rendered_pages[0]).exists()
def test_image_processing(tmp_path: Path) -> None:
image_path = tmp_path / "sample.png"
Image.new("RGBA", (1000, 1000), (255, 255, 255, 255)).save(image_path)
analysis = ImageProcessor(make_settings(tmp_path)).process(image_path)
assert analysis.checked is True
assert analysis.is_image is True
assert analysis.width == 1000
assert analysis.height == 1000
assert analysis.format == "PNG"
assert analysis.mode == "RGB"
assert Path(analysis.normalized_image).exists()
assert analysis.page_images == [analysis.normalized_image]
def test_max_pages_truncation_creates_warning(tmp_path: Path) -> None:
pdf_path = create_pdf(tmp_path / "multi-page.pdf", pages=3)
analysis = PdfProcessor(make_settings(tmp_path)).process(pdf_path, max_pages=1)
assert analysis.page_count == 3
assert analysis.pages_processed == 1
assert analysis.flags == ["max_pages_truncated"]
assert analysis.warnings == ["PDF page processing truncated from 3 to 1 page(s)."]
|