Spaces:
Runtime error
Runtime error
Sync from GitHub via hub-sync
Browse files- api/v1/analyze.py +55 -16
- api/v1/history.py +13 -3
- tests/test_api_regressions.py +110 -0
api/v1/analyze.py
CHANGED
|
@@ -98,6 +98,13 @@ def _compute_llm_summary(resp, *, record_id: int, user, media_kind: str, exclude
|
|
| 98 |
return None
|
| 99 |
|
| 100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
@router.post("/image", response_model=ImageAnalysisResponse)
|
| 102 |
@limiter.limit(ANON_ANALYZE, exempt_when=is_authed)
|
| 103 |
@limiter.limit(AUTH_ANALYZE, exempt_when=is_anon)
|
|
@@ -288,6 +295,8 @@ async def analyze_image(
|
|
| 288 |
except Exception as e: # noqa: BLE001
|
| 289 |
logger.warning(f"VLM breakdown failed, continuing: {e}")
|
| 290 |
|
|
|
|
|
|
|
| 291 |
return resp
|
| 292 |
|
| 293 |
|
|
@@ -476,6 +485,8 @@ async def analyze_video_endpoint(
|
|
| 476 |
if llm:
|
| 477 |
resp.llm_summary = llm
|
| 478 |
|
|
|
|
|
|
|
| 479 |
return resp
|
| 480 |
|
| 481 |
|
|
@@ -616,6 +627,8 @@ async def analyze_text_endpoint(
|
|
| 616 |
if llm:
|
| 617 |
resp.llm_summary = llm
|
| 618 |
|
|
|
|
|
|
|
| 619 |
return resp
|
| 620 |
|
| 621 |
|
|
@@ -799,6 +812,8 @@ async def analyze_screenshot_endpoint(
|
|
| 799 |
if llm:
|
| 800 |
resp.llm_summary = llm
|
| 801 |
|
|
|
|
|
|
|
| 802 |
return resp
|
| 803 |
|
| 804 |
|
|
@@ -967,6 +982,7 @@ async def analyze_video_async(
|
|
| 967 |
|
| 968 |
|
| 969 |
@router.post("/audio", response_model=AudioAnalysisResponse)
|
|
|
|
| 970 |
@limiter.limit(AUTH_ANALYZE, exempt_when=is_anon)
|
| 971 |
async def analyze_audio_endpoint(
|
| 972 |
request: Request,
|
|
@@ -1042,34 +1058,57 @@ async def analyze_audio_endpoint(
|
|
| 1042 |
ml_prob = ml_score["fake_probability"]
|
| 1043 |
final_prob = 0.5 * heuristics_prob + 0.5 * ml_prob
|
| 1044 |
|
| 1045 |
-
|
| 1046 |
-
|
| 1047 |
-
elif final_prob > 0.45:
|
| 1048 |
-
verdict = Verdict.SUSPICIOUS
|
| 1049 |
-
else:
|
| 1050 |
-
verdict = Verdict.LIKELY_REAL
|
| 1051 |
|
| 1052 |
resp = AudioAnalysisResponse(
|
| 1053 |
analysis_id=str(uuid.uuid4()),
|
| 1054 |
timestamp=datetime.now(timezone.utc).isoformat(),
|
| 1055 |
-
verdict=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1056 |
explainability=audio_expl,
|
| 1057 |
processing_summary=ProcessingSummary(
|
| 1058 |
-
|
| 1059 |
-
|
|
|
|
|
|
|
|
|
|
| 1060 |
)
|
| 1061 |
)
|
| 1062 |
|
| 1063 |
-
|
| 1064 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1065 |
|
| 1066 |
-
|
| 1067 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1068 |
)
|
| 1069 |
-
|
| 1070 |
-
|
| 1071 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1072 |
|
|
|
|
|
|
|
| 1073 |
return resp
|
| 1074 |
|
| 1075 |
|
|
|
|
| 98 |
return None
|
| 99 |
|
| 100 |
|
| 101 |
+
def _persist_response_payload(db: Session, record: AnalysisRecord, resp) -> None:
|
| 102 |
+
"""Keep reloaded/history responses aligned with the fresh API response."""
|
| 103 |
+
record.result_json = json.dumps(resp.model_dump())
|
| 104 |
+
db.add(record)
|
| 105 |
+
db.commit()
|
| 106 |
+
|
| 107 |
+
|
| 108 |
@router.post("/image", response_model=ImageAnalysisResponse)
|
| 109 |
@limiter.limit(ANON_ANALYZE, exempt_when=is_authed)
|
| 110 |
@limiter.limit(AUTH_ANALYZE, exempt_when=is_anon)
|
|
|
|
| 295 |
except Exception as e: # noqa: BLE001
|
| 296 |
logger.warning(f"VLM breakdown failed, continuing: {e}")
|
| 297 |
|
| 298 |
+
resp.processing_summary.stages_completed = stages
|
| 299 |
+
_persist_response_payload(db, record, resp)
|
| 300 |
return resp
|
| 301 |
|
| 302 |
|
|
|
|
| 485 |
if llm:
|
| 486 |
resp.llm_summary = llm
|
| 487 |
|
| 488 |
+
resp.processing_summary.stages_completed = stages
|
| 489 |
+
_persist_response_payload(db, record, resp)
|
| 490 |
return resp
|
| 491 |
|
| 492 |
|
|
|
|
| 627 |
if llm:
|
| 628 |
resp.llm_summary = llm
|
| 629 |
|
| 630 |
+
resp.processing_summary.stages_completed = stages
|
| 631 |
+
_persist_response_payload(db, record, resp)
|
| 632 |
return resp
|
| 633 |
|
| 634 |
|
|
|
|
| 812 |
if llm:
|
| 813 |
resp.llm_summary = llm
|
| 814 |
|
| 815 |
+
resp.processing_summary.stages_completed = stages
|
| 816 |
+
_persist_response_payload(db, record, resp)
|
| 817 |
return resp
|
| 818 |
|
| 819 |
|
|
|
|
| 982 |
|
| 983 |
|
| 984 |
@router.post("/audio", response_model=AudioAnalysisResponse)
|
| 985 |
+
@limiter.limit(ANON_ANALYZE, exempt_when=is_authed)
|
| 986 |
@limiter.limit(AUTH_ANALYZE, exempt_when=is_anon)
|
| 987 |
async def analyze_audio_endpoint(
|
| 988 |
request: Request,
|
|
|
|
| 1058 |
ml_prob = ml_score["fake_probability"]
|
| 1059 |
final_prob = 0.5 * heuristics_prob + 0.5 * ml_prob
|
| 1060 |
|
| 1061 |
+
score = int(round(max(0.0, min(100.0, (1.0 - final_prob) * 100.0))))
|
| 1062 |
+
label, severity = get_verdict_label(score)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1063 |
|
| 1064 |
resp = AudioAnalysisResponse(
|
| 1065 |
analysis_id=str(uuid.uuid4()),
|
| 1066 |
timestamp=datetime.now(timezone.utc).isoformat(),
|
| 1067 |
+
verdict=Verdict(
|
| 1068 |
+
label=label,
|
| 1069 |
+
severity=severity,
|
| 1070 |
+
authenticity_score=score,
|
| 1071 |
+
model_confidence=final_prob,
|
| 1072 |
+
model_label="Deepfake-audio-detection-V2"
|
| 1073 |
+
),
|
| 1074 |
explainability=audio_expl,
|
| 1075 |
processing_summary=ProcessingSummary(
|
| 1076 |
+
stages_completed=stages,
|
| 1077 |
+
total_duration_ms=int((time.perf_counter() - start) * 1000),
|
| 1078 |
+
model_used="MelodyMachine/Deepfake-audio-detection-V2",
|
| 1079 |
+
models_used=["MelodyMachine/Deepfake-audio-detection-V2", "audio-signal-heuristics"],
|
| 1080 |
+
calibrator_applied=False,
|
| 1081 |
)
|
| 1082 |
)
|
| 1083 |
|
| 1084 |
+
ext = (mime.split("/")[-1] if mime else "mp3").replace("mpeg", "mp3").replace("x-wav", "wav")
|
| 1085 |
+
try:
|
| 1086 |
+
media_path = save_bytes(raw, media_hash, ext)
|
| 1087 |
+
except Exception as e: # noqa: BLE001
|
| 1088 |
+
logger.warning(f"audio media save failed: {e}")
|
| 1089 |
+
media_path = None
|
| 1090 |
|
| 1091 |
+
record = AnalysisRecord(
|
| 1092 |
+
user_id=user.id if user else None,
|
| 1093 |
+
media_type="audio",
|
| 1094 |
+
verdict=label,
|
| 1095 |
+
authenticity_score=float(score),
|
| 1096 |
+
result_json=json.dumps(resp.model_dump()),
|
| 1097 |
+
media_hash=media_hash,
|
| 1098 |
+
media_path=media_path,
|
| 1099 |
)
|
| 1100 |
+
db.add(record)
|
| 1101 |
+
db.commit()
|
| 1102 |
+
db.refresh(record)
|
| 1103 |
+
resp.record_id = record.id
|
| 1104 |
+
|
| 1105 |
+
llm = _compute_llm_summary(resp, record_id=record.id, user=user, media_kind="audio_deepfake_analysis")
|
| 1106 |
+
if llm:
|
| 1107 |
+
resp.llm_summary = llm
|
| 1108 |
+
stages.append("llm_summary")
|
| 1109 |
|
| 1110 |
+
resp.processing_summary.stages_completed = stages
|
| 1111 |
+
_persist_response_payload(db, record, resp)
|
| 1112 |
return resp
|
| 1113 |
|
| 1114 |
|
api/v1/history.py
CHANGED
|
@@ -7,7 +7,7 @@ from fastapi import APIRouter, Depends, HTTPException, Query, status
|
|
| 7 |
from pydantic import BaseModel
|
| 8 |
from sqlalchemy.orm import Session
|
| 9 |
|
| 10 |
-
from api.deps import get_current_user
|
| 11 |
from db.database import get_db
|
| 12 |
from db.models import AnalysisRecord, User
|
| 13 |
|
|
@@ -55,12 +55,22 @@ def list_history(
|
|
| 55 |
@router.get("/{record_id}")
|
| 56 |
def get_history_detail(
|
| 57 |
record_id: int,
|
| 58 |
-
|
|
|
|
| 59 |
db: Session = Depends(get_db),
|
| 60 |
):
|
| 61 |
r = db.query(AnalysisRecord).filter(AnalysisRecord.id == record_id).first()
|
| 62 |
-
if not r
|
| 63 |
raise HTTPException(status.HTTP_404_NOT_FOUND, "Analysis not found")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
try:
|
| 65 |
payload = json.loads(r.result_json)
|
| 66 |
# Inject storage fields from DB columns so the frontend can display full-size media
|
|
|
|
| 7 |
from pydantic import BaseModel
|
| 8 |
from sqlalchemy.orm import Session
|
| 9 |
|
| 10 |
+
from api.deps import get_current_user, optional_current_user
|
| 11 |
from db.database import get_db
|
| 12 |
from db.models import AnalysisRecord, User
|
| 13 |
|
|
|
|
| 55 |
@router.get("/{record_id}")
|
| 56 |
def get_history_detail(
|
| 57 |
record_id: int,
|
| 58 |
+
token: str | None = Query(None),
|
| 59 |
+
user: User | None = Depends(optional_current_user),
|
| 60 |
db: Session = Depends(get_db),
|
| 61 |
):
|
| 62 |
r = db.query(AnalysisRecord).filter(AnalysisRecord.id == record_id).first()
|
| 63 |
+
if not r:
|
| 64 |
raise HTTPException(status.HTTP_404_NOT_FOUND, "Analysis not found")
|
| 65 |
+
if user is None or r.user_id != user.id:
|
| 66 |
+
if r.user_id is not None or not token:
|
| 67 |
+
raise HTTPException(status.HTTP_404_NOT_FOUND, "Analysis not found")
|
| 68 |
+
try:
|
| 69 |
+
token_payload = json.loads(r.result_json)
|
| 70 |
+
except Exception:
|
| 71 |
+
raise HTTPException(status.HTTP_500_INTERNAL_SERVER_ERROR, "Corrupt result payload")
|
| 72 |
+
if token_payload.get("analysis_id") != token:
|
| 73 |
+
raise HTTPException(status.HTTP_404_NOT_FOUND, "Analysis not found")
|
| 74 |
try:
|
| 75 |
payload = json.loads(r.result_json)
|
| 76 |
# Inject storage fields from DB columns so the frontend can display full-size media
|
tests/test_api_regressions.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
from datetime import datetime, timezone
|
| 6 |
+
|
| 7 |
+
import pytest
|
| 8 |
+
from sqlalchemy import create_engine
|
| 9 |
+
from sqlalchemy.orm import sessionmaker
|
| 10 |
+
|
| 11 |
+
os.environ["DEBUG"] = "false"
|
| 12 |
+
|
| 13 |
+
from api.v1.analyze import _persist_response_payload
|
| 14 |
+
from api.v1.history import get_history_detail
|
| 15 |
+
from db.models import AnalysisRecord
|
| 16 |
+
from db.database import Base
|
| 17 |
+
from schemas.analyze import TextAnalysisResponse, TextExplainability
|
| 18 |
+
from schemas.common import LLMExplainabilitySummary, ProcessingSummary, Verdict
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
@pytest.fixture()
|
| 22 |
+
def db_session():
|
| 23 |
+
engine = create_engine("sqlite:///:memory:", connect_args={"check_same_thread": False})
|
| 24 |
+
Base.metadata.create_all(bind=engine)
|
| 25 |
+
Session = sessionmaker(bind=engine)
|
| 26 |
+
db = Session()
|
| 27 |
+
try:
|
| 28 |
+
yield db
|
| 29 |
+
finally:
|
| 30 |
+
db.close()
|
| 31 |
+
Base.metadata.drop_all(bind=engine)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def test_anonymous_history_detail_accepts_matching_analysis_token(db_session):
|
| 35 |
+
payload = {
|
| 36 |
+
"analysis_id": "public-token",
|
| 37 |
+
"media_type": "text",
|
| 38 |
+
"verdict": {"label": "Likely Real", "authenticity_score": 80},
|
| 39 |
+
}
|
| 40 |
+
record = AnalysisRecord(
|
| 41 |
+
user_id=None,
|
| 42 |
+
media_type="text",
|
| 43 |
+
verdict="Likely Real",
|
| 44 |
+
authenticity_score=80,
|
| 45 |
+
result_json=json.dumps(payload),
|
| 46 |
+
)
|
| 47 |
+
db_session.add(record)
|
| 48 |
+
db_session.commit()
|
| 49 |
+
db_session.refresh(record)
|
| 50 |
+
|
| 51 |
+
result = get_history_detail(record.id, token="public-token", user=None, db=db_session)
|
| 52 |
+
|
| 53 |
+
assert result["analysis_id"] == "public-token"
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def test_anonymous_history_detail_rejects_missing_analysis_token(db_session):
|
| 57 |
+
record = AnalysisRecord(
|
| 58 |
+
user_id=None,
|
| 59 |
+
media_type="text",
|
| 60 |
+
verdict="Likely Real",
|
| 61 |
+
authenticity_score=80,
|
| 62 |
+
result_json=json.dumps({"analysis_id": "public-token"}),
|
| 63 |
+
)
|
| 64 |
+
db_session.add(record)
|
| 65 |
+
db_session.commit()
|
| 66 |
+
db_session.refresh(record)
|
| 67 |
+
|
| 68 |
+
with pytest.raises(Exception):
|
| 69 |
+
get_history_detail(record.id, token=None, user=None, db=db_session)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def test_persist_response_payload_keeps_postprocessing_fields_for_reload(db_session):
|
| 73 |
+
record = AnalysisRecord(
|
| 74 |
+
user_id=1,
|
| 75 |
+
media_type="text",
|
| 76 |
+
verdict="Likely Real",
|
| 77 |
+
authenticity_score=80,
|
| 78 |
+
result_json="{}",
|
| 79 |
+
)
|
| 80 |
+
db_session.add(record)
|
| 81 |
+
db_session.commit()
|
| 82 |
+
db_session.refresh(record)
|
| 83 |
+
|
| 84 |
+
resp = TextAnalysisResponse(
|
| 85 |
+
analysis_id="analysis-1",
|
| 86 |
+
record_id=record.id,
|
| 87 |
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
| 88 |
+
verdict=Verdict(
|
| 89 |
+
label="Likely Real",
|
| 90 |
+
severity="positive",
|
| 91 |
+
authenticity_score=80,
|
| 92 |
+
model_confidence=0.2,
|
| 93 |
+
model_label="real",
|
| 94 |
+
),
|
| 95 |
+
explainability=TextExplainability(fake_probability=0.2, top_label="real"),
|
| 96 |
+
llm_summary=LLMExplainabilitySummary(paragraph="Persisted explanation"),
|
| 97 |
+
processing_summary=ProcessingSummary(
|
| 98 |
+
stages_completed=["classification", "llm_explanation"],
|
| 99 |
+
total_duration_ms=12,
|
| 100 |
+
model_used="test-model",
|
| 101 |
+
),
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
_persist_response_payload(db_session, record, resp)
|
| 105 |
+
|
| 106 |
+
db_session.refresh(record)
|
| 107 |
+
stored = json.loads(record.result_json)
|
| 108 |
+
assert stored["record_id"] == record.id
|
| 109 |
+
assert stored["llm_summary"]["paragraph"] == "Persisted explanation"
|
| 110 |
+
assert stored["processing_summary"]["stages_completed"] == ["classification", "llm_explanation"]
|