SciPeerAI-API / tests /test_figure_forensics.py
Abu-Sameer-66
deploy: SciPeerAI v2.3.0 β€” 24 modules, 209 tests, Phase 6 complete
a53c25d
# tests/test_figure_forensics.py
#
# Testing figure forensics without real PDFs.
# We test the core algorithms directly β€” hash comparison,
# ELA computation, brightness analysis.
import io
import pytest
import numpy as np
from PIL import Image
from src.scipeerai.modules.figure_forensics import (
FigureForensicsEngine,
ExtractedFigure,
FigureForensicsResult,
)
@pytest.fixture
def engine():
return FigureForensicsEngine()
def make_figure(color=(128, 64, 32), size=(100, 100), page=1, idx=0):
"""Helper β€” create a fake ExtractedFigure with a solid color image."""
img = Image.new("RGB", size, color=color)
return ExtractedFigure(
page_number=page,
figure_index=idx,
width=size[0],
height=size[1],
image=img,
)
# ── duplicate detection ───────────────────────────────────────────────────────
def test_identical_images_flagged_as_duplicates(engine):
# same image twice β€” must be caught
fig_a = make_figure(color=(200, 100, 50), idx=0, page=1)
fig_b = make_figure(color=(200, 100, 50), idx=1, page=3)
flags, pairs = engine._check_duplicates([fig_a, fig_b])
assert len(pairs) == 1
assert any(f.flag_type == "duplicate_figures" for f in flags)
def test_different_images_not_flagged(engine):
# solid colors fool phash β€” real figures have texture and detail
# so we test with noise-based images, which represent actual paper figures
import numpy as np
rng = np.random.default_rng(seed=42)
arr_a = rng.integers(0, 255, (100, 100, 3), dtype=np.uint8)
arr_b = rng.integers(0, 128, (100, 100, 3), dtype=np.uint8)
arr_b[:, :, 0] = 255 - arr_b[:, :, 0] # invert red channel β€” maximally different
img_a = Image.fromarray(arr_a, "RGB")
img_b = Image.fromarray(arr_b, "RGB")
fig_a = ExtractedFigure(1, 0, 100, 100, img_a)
fig_b = ExtractedFigure(2, 1, 100, 100, img_b)
flags, pairs = engine._check_duplicates([fig_a, fig_b])
assert len(pairs) == 0
# ── brightness uniformity ──────────────────────────────────────────────────────
def test_flat_image_high_uniformity(engine):
# solid color = perfectly uniform = suspicious
fig = make_figure(color=(128, 128, 128))
score = engine._compute_brightness_uniformity(fig.image)
assert score > 0.90
def test_noisy_image_low_uniformity(engine):
# random noise = natural-looking variation
noise_array = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
noisy_img = Image.fromarray(noise_array, "RGB")
fig = ExtractedFigure(1, 0, 100, 100, noisy_img)
score = engine._compute_brightness_uniformity(fig.image)
assert score < 0.70
# ── ela computation ────────────────────────────────────────────────────────────
def test_ela_returns_float(engine):
fig = make_figure()
score = engine._compute_ela_score(fig.image)
assert isinstance(score, float)
assert score >= 0.0
# ── result structure ───────────────────────────────────────────────────────────
def test_no_figures_returns_clean_result(engine, tmp_path):
# create minimal valid PDF with no images
import fitz
pdf_path = tmp_path / "empty.pdf"
doc = fitz.open()
page = doc.new_page()
page.insert_text((50, 50), "This paper has no figures.")
doc.save(str(pdf_path))
doc.close()
result = engine.analyze(str(pdf_path))
assert isinstance(result, FigureForensicsResult)
assert result.figures_found == 0
assert result.risk_level == "low"
assert result.risk_score == 0.0