Spaces:

ar07xd
/

deepshield

Runtime error

App Files Files Community

Spyderzz commited on Apr 26

Commit

4c9e797

1 Parent(s): e2bfbf6

fix analyze cache and language options

Browse files

Files changed (1) hide show

api/v1/analyze.py +463 -84

api/v1/analyze.py CHANGED Viewed

@@ -1,12 +1,10 @@
-from __future__ import annotations
 import json
 import os
 import time
 import uuid
 from datetime import datetime, timezone
-from fastapi import APIRouter, Body, Depends, File, UploadFile
 from pydantic import BaseModel
 from loguru import logger
 from sqlalchemy.orm import Session
@@ -40,11 +38,10 @@ from services.screenshot_service import (
 )
 from services.ela_service import generate_ela_base64
 from services.exif_service import extract_exif
-from services.image_service import load_image_from_bytes
 from services.llm_explainer import generate_llm_summary
 from schemas.common import ProcessingSummary, Verdict
 from services.artifact_detector import scan_artifacts
-from services.image_service import preprocess_and_classify
 from services.news_lookup import search_news_full
 from services.vlm_breakdown import generate_vlm_breakdown
 from services.text_service import (
@@ -55,8 +52,22 @@ from services.text_service import (
     score_sensationalism,
 )
 from services.video_service import analyze_video
 from utils.file_handler import read_upload_bytes, save_upload_to_tempfile
-from utils.scoring import compute_authenticity_score, get_verdict_label
 router = APIRouter(prefix="/analyze", tags=["analyze"])
@@ -64,9 +75,34 @@ IMAGE_MAX_MB = 20
 VIDEO_MAX_MB = 100
 VIDEO_NUM_FRAMES = 16
 @router.post("/image", response_model=ImageAnalysisResponse)
 async def analyze_image(
     file: UploadFile = File(...),
     db: Session = Depends(get_db),
     user: User | None = Depends(optional_current_user),
@@ -79,8 +115,16 @@ async def analyze_image(
     )
     stages.append("validation")
-    pil, clf = preprocess_and_classify(raw)
-    stages.append("classification")
     indicators = scan_artifacts(pil, raw)
     stages.append("artifact_scanning")
@@ -89,7 +133,10 @@ async def analyze_image(
     heatmap_status = "success"
     heatmap = ""
     try:
-        heatmap = generate_heatmap_base64(pil)
         stages.append("heatmap_generation")
     except Exception as e:  # noqa: BLE001
         logger.warning(f"Heatmap generation failed, continuing: {e}")
@@ -119,18 +166,37 @@ async def analyze_image(
     except Exception as e:  # noqa: BLE001
         logger.warning(f"EXIF extraction failed, continuing: {e}")
     score = compute_authenticity_score(clf.confidence, clf.label)
-    # Apply EXIF trust adjustment to the score
-    if exif_summary and exif_summary.trust_adjustment != 0:
-        score = int(round(max(0, min(100, score + exif_summary.trust_adjustment))))
     label, severity = get_verdict_label(score)
     duration_ms = int((time.perf_counter() - start) * 1000)
-    analysis_id = str(uuid.uuid4())
-    response = ImageAnalysisResponse(
         analysis_id=analysis_id,
         media_type="image",
         timestamp=datetime.now(timezone.utc).isoformat(),
@@ -148,6 +214,8 @@ async def analyze_image(
             heatmap_status=heatmap_status,
             artifact_indicators=indicators,
             exif=exif_summary,
         ),
         trusted_sources=[],
         contradicting_evidence=[],
@@ -155,51 +223,81 @@ async def analyze_image(
             stages_completed=stages,
             total_duration_ms=duration_ms,
             model_used=settings.IMAGE_MODEL_ID,
         ),
     )
     record = AnalysisRecord(
         user_id=user.id if user else None,
         media_type="image",
         verdict=label,
         authenticity_score=float(score),
-        result_json=json.dumps(response.model_dump(
             exclude={"explainability": {"heatmap_base64", "ela_base64", "boxes_base64"}}
         )),
     )
     db.add(record)
     db.commit()
     db.refresh(record)
-    response.record_id = record.id
     logger.info(f"Saved AnalysisRecord id={record.id} score={score} verdict={label}")
-    # ── Phase 12: LLM explainability card (runs after DB save so we have record_id) ──
-    try:
-        llm_summary = generate_llm_summary(
-            payload=response.model_dump(
-                exclude={"explainability": {"heatmap_base64", "ela_base64", "boxes_base64"}}
-            ),
-            record_id=str(record.id),
-        )
-        response.explainability.llm_summary = llm_summary
         stages.append("llm_explanation")
-    except Exception as e:  # noqa: BLE001
-        logger.warning(f"LLM explainer failed, continuing: {e}")
-    # ── Phase 14: VLM detailed breakdown (vision LLM scores 6 perceptual components) ──
-    try:
-        vlm_bd = generate_vlm_breakdown(pil, record_id=str(record.id))
-        if vlm_bd:
-            response.explainability.vlm_breakdown = vlm_bd
-            stages.append("vlm_breakdown")
-    except Exception as e:  # noqa: BLE001
-        logger.warning(f"VLM breakdown failed, continuing: {e}")
-    return response
 @router.post("/video", response_model=VideoAnalysisResponse)
 async def analyze_video_endpoint(
     file: UploadFile = File(...),
     db: Session = Depends(get_db),
     user: User | None = Depends(optional_current_user),
@@ -213,27 +311,67 @@ async def analyze_video_endpoint(
     )
     stages.append("validation")
     try:
         agg = analyze_video(path, num_frames=VIDEO_NUM_FRAMES)
         stages.append("frame_extraction")
         stages.append("frame_classification")
         stages.append("aggregation")
-    finally:
         try:
             os.unlink(path)
         except OSError:
             pass
-    if agg.insufficient_faces:
-        score = 50
-        label = "Insufficient face content"
-        severity = "warning"
-    else:
-        score = int(round(max(0.0, min(100.0, (1.0 - agg.mean_suspicious_prob) * 100.0))))
-        label, severity = get_verdict_label(score)
     duration_ms = int((time.perf_counter() - start) * 1000)
-    response = VideoAnalysisResponse(
         analysis_id=str(uuid.uuid4()),
         media_type="video",
         timestamp=datetime.now(timezone.utc).isoformat(),
@@ -266,47 +404,86 @@ async def analyze_video_endpoint(
                 )
                 for f in agg.frames
             ],
         ),
         processing_summary=ProcessingSummary(
             stages_completed=stages,
             total_duration_ms=duration_ms,
             model_used=settings.IMAGE_MODEL_ID,
         ),
     )
     record = AnalysisRecord(
         user_id=user.id if user else None,
         media_type="video",
         verdict=label,
         authenticity_score=float(score),
-        result_json=json.dumps(response.model_dump()),
     )
     db.add(record)
     db.commit()
     db.refresh(record)
-    response.record_id = record.id
     logger.info(
         f"Saved AnalysisRecord id={record.id} video score={score} verdict={label} "
         f"frames={agg.num_frames_sampled} susp={agg.num_suspicious_frames}"
     )
-    # Phase 12: LLM explainability card
     try:
-        response.llm_summary = generate_llm_summary(
-            payload=response.model_dump(), record_id=str(record.id),
         )
     except Exception as e:  # noqa: BLE001
-        logger.warning(f"LLM explainer failed for video: {e}")
-    return response
 class TextAnalyzeBody(BaseModel):
     text: str
 @router.post("/text", response_model=TextAnalysisResponse)
 async def analyze_text_endpoint(
     body: TextAnalyzeBody = Body(...),
     db: Session = Depends(get_db),
     user: User | None = Depends(optional_current_user),
@@ -315,7 +492,7 @@ async def analyze_text_endpoint(
     stages: list[str] = []
     # Phase 13: language detection — routes to multilang model when non-English
-    lang = detect_language(body.text)
     stages.append("language_detection")
     clf = classify_text(body.text, language=lang)
@@ -345,10 +522,12 @@ async def analyze_text_endpoint(
         effective_fake_prob = news.truth_override.fake_prob_after
         stages.append("truth_override_applied")
-    # Weighted score: 70% classifier + 20% inverse sensationalism + 10% manipulation penalty
     manip_penalty = min(len(manip) * 5, 30)
     raw_score = (1.0 - effective_fake_prob) * 100.0
-    weighted = raw_score * 0.70 + max(0, 100 - sens.score) * 0.20 + max(0, 100 - manip_penalty) * 0.10
     score = int(round(max(0.0, min(100.0, weighted))))
     label, severity = get_verdict_label(score)
     duration_ms = int((time.perf_counter() - start) * 1000)
@@ -358,7 +537,7 @@ async def analyze_text_endpoint(
         else settings.TEXT_MODEL_ID
     )
-    response = TextAnalysisResponse(
         analysis_id=str(uuid.uuid4()),
         media_type="text",
         timestamp=datetime.now(timezone.utc).isoformat(),
@@ -403,6 +582,7 @@ async def analyze_text_endpoint(
             stages_completed=stages,
             total_duration_ms=duration_ms,
             model_used=model_used,
         ),
     )
@@ -411,27 +591,30 @@ async def analyze_text_endpoint(
         media_type="text",
         verdict=label,
         authenticity_score=float(score),
-        result_json=json.dumps(response.model_dump()),
     )
     db.add(record)
     db.commit()
     db.refresh(record)
-    response.record_id = record.id
     logger.info(f"Saved AnalysisRecord id={record.id} text score={score} verdict={label}")
-    # Phase 12: LLM explainability card
-    try:
-        response.llm_summary = generate_llm_summary(
-            payload=response.model_dump(), record_id=str(record.id),
-        )
-    except Exception as e:  # noqa: BLE001
-        logger.warning(f"LLM explainer failed for text: {e}")
-    return response
 @router.post("/screenshot", response_model=ScreenshotAnalysisResponse)
 async def analyze_screenshot_endpoint(
     file: UploadFile = File(...),
     db: Session = Depends(get_db),
     user: User | None = Depends(optional_current_user),
@@ -444,6 +627,15 @@ async def analyze_screenshot_endpoint(
     )
     stages.append("validation")
     pil = load_image_from_bytes(raw)
     ocr_boxes = run_ocr(pil)
     stages.append("ocr")
@@ -451,7 +643,7 @@ async def analyze_screenshot_endpoint(
     full_text = extract_full_text(ocr_boxes)
     # Phase 13: language detection on extracted OCR text
-    lang = detect_language(full_text) if full_text else "en"
     stages.append("language_detection")
     clf = classify_text(full_text, language=lang) if full_text else None
@@ -493,12 +685,12 @@ async def analyze_screenshot_endpoint(
     manip_penalty = min(len(manip) * 5, 30)
     layout_penalty = min(len(layout) * 5, 15)
     raw_score = (1.0 - effective_fake_prob) * 100.0
-    weighted = (
-        raw_score * 0.65
-        + max(0, 100 - sens.score) * 0.20
-        + max(0, 100 - manip_penalty) * 0.10
-        + max(0, 100 - layout_penalty) * 0.05
     )
     if not full_text.strip():
         weighted = 50
     score = int(round(max(0.0, min(100.0, weighted))))
@@ -511,7 +703,7 @@ async def analyze_screenshot_endpoint(
         else f"{settings.TEXT_MODEL_ID} + EasyOCR"
     )
-    response = ScreenshotAnalysisResponse(
         analysis_id=str(uuid.uuid4()),
         media_type="screenshot",
         timestamp=datetime.now(timezone.utc).isoformat(),
@@ -554,28 +746,215 @@ async def analyze_screenshot_endpoint(
             stages_completed=stages,
             total_duration_ms=duration_ms,
             model_used=model_used_str,
         ),
     )
     record = AnalysisRecord(
         user_id=user.id if user else None,
         media_type="screenshot",
         verdict=label,
         authenticity_score=float(score),
-        result_json=json.dumps(response.model_dump()),
     )
     db.add(record)
     db.commit()
     db.refresh(record)
-    response.record_id = record.id
     logger.info(f"Saved AnalysisRecord id={record.id} screenshot score={score} verdict={label}")
-    # Phase 12: LLM explainability card
-    try:
-        response.llm_summary = generate_llm_summary(
-            payload=response.model_dump(), record_id=str(record.id),
-        )
-    except Exception as e:  # noqa: BLE001
-        logger.warning(f"LLM explainer failed for screenshot: {e}")
-    return response

 import json
 import os
 import time
 import uuid
 from datetime import datetime, timezone
+from fastapi import APIRouter, BackgroundTasks, Body, Depends, File, HTTPException, Query, Request, Response, UploadFile, status
 from pydantic import BaseModel
 from loguru import logger
 from sqlalchemy.orm import Session
 )
 from services.ela_service import generate_ela_base64
 from services.exif_service import extract_exif
+from services.image_service import classify_image, load_image_from_bytes
 from services.llm_explainer import generate_llm_summary
 from schemas.common import ProcessingSummary, Verdict
 from services.artifact_detector import scan_artifacts
 from services.news_lookup import search_news_full
 from services.vlm_breakdown import generate_vlm_breakdown
 from services.text_service import (
     score_sensationalism,
 )
 from services.video_service import analyze_video
+from services.audio_service import analyze_audio, AudioAnalysis
+from services.metadata_writer import write_verdict_metadata
+from services.rate_limit import ANON_ANALYZE, AUTH_ANALYZE, is_anon, is_authed, limiter
+from services.dedup_cache import lookup_cached, cached_payload
+from services.storage import (
+    make_image_thumbnail,
+    make_video_thumbnail,
+    save_bytes,
+    save_file,
+    save_overlay,
+    sha256_bytes,
+    sha256_file,
+)
+from services.job_queue import registry as job_registry, run_job
 from utils.file_handler import read_upload_bytes, save_upload_to_tempfile
+from utils.scoring import compute_authenticity_score, compute_video_authenticity_score, get_verdict_label
 router = APIRouter(prefix="/analyze", tags=["analyze"])
 VIDEO_MAX_MB = 100
 VIDEO_NUM_FRAMES = 16
+_IMAGE_EXCLUDE = {"explainability": {"heatmap_base64", "ela_base64", "boxes_base64"}}
+def _resolve_language_hint(text: str, language_hint: str | None) -> str:
+    hint = (language_hint or "auto").strip().lower()
+    if hint and hint != "auto":
+        return hint
+    return detect_language(text)
+def _compute_llm_summary(resp, *, record_id: int, user, media_kind: str, exclude: dict | None = None):
+    """Generate the LLM summary for `resp`. Swallows provider errors gracefully."""
+    try:
+        payload = resp.model_dump(exclude=exclude) if exclude else resp.model_dump()
+        return generate_llm_summary(payload=payload, record_id=str(record_id))
+    except Exception as e:  # noqa: BLE001
+        logger.warning(f"LLM explainer failed for {media_kind}: {e}")
+        return None
 @router.post("/image", response_model=ImageAnalysisResponse)
+@limiter.limit(ANON_ANALYZE, exempt_when=is_authed)
+@limiter.limit(AUTH_ANALYZE, exempt_when=is_anon)
 async def analyze_image(
+    request: Request,
+    response: Response,
+    cache: bool = Query(default=True),
+    language_hint: str = Query(default="auto"),
     file: UploadFile = File(...),
     db: Session = Depends(get_db),
     user: User | None = Depends(optional_current_user),
     )
     stages.append("validation")
+    # Phase 19.1 — SHA-256 dedup cache
+    media_hash = sha256_bytes(raw)
+    cached = lookup_cached(db, media_hash=media_hash, media_type="image", user_id=user.id if user else None) if cache else None
+    if cached is not None:
+        payload = cached_payload(cached)
+        if payload is not None:
+            logger.info(f"cache hit image sha={media_hash[:12]} record={cached.id}")
+            return ImageAnalysisResponse.model_validate(payload)
+    pil = load_image_from_bytes(raw)
     indicators = scan_artifacts(pil, raw)
     stages.append("artifact_scanning")
     heatmap_status = "success"
     heatmap = ""
     try:
+        model_family = "efficientnet" if settings.ENSEMBLE_MODE else "vit"
+        heatmap, heatmap_source = generate_heatmap_base64(pil, model_family=model_family)
+        if not heatmap:
+            heatmap_status = heatmap_source  # "none" or "fallback"
         stages.append("heatmap_generation")
     except Exception as e:  # noqa: BLE001
         logger.warning(f"Heatmap generation failed, continuing: {e}")
     except Exception as e:  # noqa: BLE001
         logger.warning(f"EXIF extraction failed, continuing: {e}")
+    clf = classify_image(pil, artifact_indicators=indicators, exif=exif_summary)
+    stages.append("classification")
+    analysis_id = str(uuid.uuid4())
+    vlm_bd = None
+    if user is not None and clf.no_face_analysis is not None:
+        try:
+            vlm_bd = generate_vlm_breakdown(pil, record_id=analysis_id)
+            if vlm_bd:
+                clf = classify_image(
+                    pil,
+                    artifact_indicators=indicators,
+                    exif=exif_summary,
+                    vlm_breakdown=vlm_bd,
+                )
+                stages.append("vlm_no_face_fusion")
+        except Exception as e:  # noqa: BLE001
+            logger.warning(f"VLM no-face fusion failed, continuing: {e}")
     score = compute_authenticity_score(clf.confidence, clf.label)
+    # Apply EXIF trust adjustment.
+    # trust_adjustment convention: negative = more real → subtract to RAISE authenticity score.
+    # positive = more fake → subtract to LOWER authenticity score.
+    if clf.no_face_analysis is None and exif_summary and exif_summary.trust_adjustment != 0:
+        score = int(round(max(0, min(100, score - exif_summary.trust_adjustment))))
     label, severity = get_verdict_label(score)
     duration_ms = int((time.perf_counter() - start) * 1000)
+    resp = ImageAnalysisResponse(
         analysis_id=analysis_id,
         media_type="image",
         timestamp=datetime.now(timezone.utc).isoformat(),
             heatmap_status=heatmap_status,
             artifact_indicators=indicators,
             exif=exif_summary,
+            no_face_analysis=clf.no_face_analysis,
+            vlm_breakdown=vlm_bd,
         ),
         trusted_sources=[],
         contradicting_evidence=[],
             stages_completed=stages,
             total_duration_ms=duration_ms,
             model_used=settings.IMAGE_MODEL_ID,
+            models_used=clf.models_used,
+            calibrator_applied=clf.calibrator_applied,
         ),
     )
+    # Phase 19.2 — persist original bytes + thumbnail under content-address
+    ext = (mime.split("/")[-1] if mime else "jpg").replace("jpeg", "jpg")
+    try:
+        media_path = save_bytes(raw, media_hash, ext)
+    except Exception as e:  # noqa: BLE001
+        logger.warning(f"media save failed: {e}")
+        media_path = None
+    thumbnail_url = make_image_thumbnail(pil, media_hash)
+    resp.thumbnail_url = thumbnail_url
+    if media_path:
+        resp.media_path = media_path
+    # Persist overlay images so they survive page reloads (base64 excluded from DB)
+    if heatmap:
+        url = save_overlay(heatmap, media_hash, "heatmap")
+        if url:
+            resp.explainability.heatmap_url = url
+    if ela_b64:
+        url = save_overlay(ela_b64, media_hash, "ela")
+        if url:
+            resp.explainability.ela_url = url
+    if boxes_b64:
+        url = save_overlay(boxes_b64, media_hash, "boxes")
+        if url:
+            resp.explainability.boxes_url = url
     record = AnalysisRecord(
         user_id=user.id if user else None,
         media_type="image",
         verdict=label,
         authenticity_score=float(score),
+        result_json=json.dumps(resp.model_dump(
             exclude={"explainability": {"heatmap_base64", "ela_base64", "boxes_base64"}}
         )),
+        media_hash=media_hash,
+        media_path=media_path,
+        thumbnail_url=thumbnail_url,
     )
     db.add(record)
     db.commit()
     db.refresh(record)
+    resp.record_id = record.id
     logger.info(f"Saved AnalysisRecord id={record.id} score={score} verdict={label}")
+    # ── Phase 12+14: LLM + VLM cards (authed users only — conserves LLM quota) ──
+    llm_summary = _compute_llm_summary(resp, record_id=record.id, user=user, media_kind="image", exclude=_IMAGE_EXCLUDE)
+    if llm_summary:
+        resp.explainability.llm_summary = llm_summary
         stages.append("llm_explanation")
+    if user is not None and vlm_bd is None:
+        try:
+            vlm_bd = generate_vlm_breakdown(pil, record_id=str(record.id))
+            if vlm_bd:
+                resp.explainability.vlm_breakdown = vlm_bd
+                stages.append("vlm_breakdown")
+        except Exception as e:  # noqa: BLE001
+            logger.warning(f"VLM breakdown failed, continuing: {e}")
+    return resp
 @router.post("/video", response_model=VideoAnalysisResponse)
+@limiter.limit(ANON_ANALYZE, exempt_when=is_authed)
+@limiter.limit(AUTH_ANALYZE, exempt_when=is_anon)
 async def analyze_video_endpoint(
+    request: Request,
+    response: Response,
+    cache: bool = Query(default=True),
+    language_hint: str = Query(default="auto"),
     file: UploadFile = File(...),
     db: Session = Depends(get_db),
     user: User | None = Depends(optional_current_user),
     )
     stages.append("validation")
+    # Phase 19.1 — dedup cache (hash temp file before running pipeline)
+    media_hash = sha256_file(path)
+    cached = lookup_cached(db, media_hash=media_hash, media_type="video", user_id=user.id if user else None) if cache else None
+    if cached is not None:
+        payload = cached_payload(cached)
+        if payload is not None:
+            try:
+                os.unlink(path)
+            except OSError:
+                pass
+            logger.info(f"cache hit video sha={media_hash[:12]} record={cached.id}")
+            return VideoAnalysisResponse.model_validate(payload)
     try:
         agg = analyze_video(path, num_frames=VIDEO_NUM_FRAMES)
         stages.append("frame_extraction")
         stages.append("frame_classification")
         stages.append("aggregation")
+        if agg.temporal:
+            stages.append("temporal_analysis")
+    except Exception:
         try:
             os.unlink(path)
         except OSError:
             pass
+        raise
+    # Phase 17.2 — audio analysis (needs file path, runs before cleanup)
+    audio_result: AudioAnalysis | None = None
+    try:
+        audio_result = analyze_audio(path)
+        if audio_result:
+            stages.append("audio_analysis")
+    except Exception as _ae:  # noqa: BLE001
+        logger.warning(f"Audio analysis failed, continuing: {_ae}")
+    # Phase 17.3 — combined verdict formula
+    score, label, severity = compute_video_authenticity_score(
+        mean_suspicious_prob=agg.mean_suspicious_prob,
+        insufficient_faces=agg.insufficient_faces,
+        temporal_score=agg.temporal.temporal_score if agg.temporal else None,
+        audio_authenticity_score=audio_result.audio_authenticity_score if audio_result else None,
+        has_audio=bool(audio_result and audio_result.has_audio),
+    )
     duration_ms = int((time.perf_counter() - start) * 1000)
+    from schemas.analyze import AudioExplainability
+    audio_ex = None
+    if audio_result:
+        audio_ex = AudioExplainability(
+            audio_authenticity_score=audio_result.audio_authenticity_score,
+            has_audio=audio_result.has_audio,
+            duration_s=audio_result.duration_s,
+            silence_ratio=audio_result.silence_ratio,
+            spectral_variance=audio_result.spectral_variance,
+            rms_consistency=audio_result.rms_consistency,
+            notes=audio_result.notes,
+        )
+    resp = VideoAnalysisResponse(
         analysis_id=str(uuid.uuid4()),
         media_type="video",
         timestamp=datetime.now(timezone.utc).isoformat(),
                 )
                 for f in agg.frames
             ],
+            temporal_score=agg.temporal.temporal_score if agg.temporal else None,
+            optical_flow_variance=agg.temporal.optical_flow_variance if agg.temporal else None,
+            flicker_score=agg.temporal.flicker_score if agg.temporal else None,
+            blink_rate_anomaly=agg.temporal.blink_rate_anomaly if agg.temporal else None,
+            audio=audio_ex,
         ),
         processing_summary=ProcessingSummary(
             stages_completed=stages,
             total_duration_ms=duration_ms,
             model_used=settings.IMAGE_MODEL_ID,
+            models_used=agg.models_used,
+            calibrator_applied=agg.calibrator_applied,
         ),
     )
+    # Phase 19.2 — persist video + thumbnail frame
+    try:
+        media_path = save_file(path, media_hash, suffix.lstrip("."))
+    except Exception as e:  # noqa: BLE001
+        logger.warning(f"video media save failed: {e}")
+        media_path = None
+    thumbnail_url = make_video_thumbnail(path, media_hash)
+    resp.thumbnail_url = thumbnail_url
     record = AnalysisRecord(
         user_id=user.id if user else None,
         media_type="video",
         verdict=label,
         authenticity_score=float(score),
+        result_json=json.dumps(resp.model_dump()),
+        media_hash=media_hash,
+        media_path=media_path,
+        thumbnail_url=thumbnail_url,
     )
     db.add(record)
     db.commit()
     db.refresh(record)
+    resp.record_id = record.id
     logger.info(
         f"Saved AnalysisRecord id={record.id} video score={score} verdict={label} "
         f"frames={agg.num_frames_sampled} susp={agg.num_suspicious_frames}"
     )
+    # Write verdict into video metadata (ExifTool, optional — gated by EXIFTOOL_PATH).
     try:
+        write_verdict_metadata(
+            file_path=path,
+            verdict=label,
+            authenticity_score=score,
+            models_used=agg.models_used,
+            analysis_id=str(record.id),
         )
     except Exception as e:  # noqa: BLE001
+        logger.warning(f"Metadata write failed: {e}")
+    finally:
+        try:
+            os.unlink(path)
+        except OSError:
+            pass
+    # Phase 12: LLM explainability card (authed users only — conserves LLM quota)
+    llm = _compute_llm_summary(resp, record_id=record.id, user=user, media_kind="video")
+    if llm:
+        resp.llm_summary = llm
+    return resp
 class TextAnalyzeBody(BaseModel):
     text: str
+    cache: bool = True
+    language_hint: str = "auto"
 @router.post("/text", response_model=TextAnalysisResponse)
+@limiter.limit(ANON_ANALYZE, exempt_when=is_authed)
+@limiter.limit(AUTH_ANALYZE, exempt_when=is_anon)
 async def analyze_text_endpoint(
+    request: Request,
+    response: Response,
     body: TextAnalyzeBody = Body(...),
     db: Session = Depends(get_db),
     user: User | None = Depends(optional_current_user),
     stages: list[str] = []
     # Phase 13: language detection — routes to multilang model when non-English
+    lang = _resolve_language_hint(body.text, body.language_hint)
     stages.append("language_detection")
     clf = classify_text(body.text, language=lang)
         effective_fake_prob = news.truth_override.fake_prob_after
         stages.append("truth_override_applied")
+    # Weighted score: keep classifier authoritative. Linguistic heuristics can
+    # lower confidence, but should not give a high floor when classifier is very fake.
     manip_penalty = min(len(manip) * 5, 30)
     raw_score = (1.0 - effective_fake_prob) * 100.0
+    heuristic_score = max(0, 100 - sens.score) * 0.60 + max(0, 100 - manip_penalty) * 0.40
+    weighted = raw_score * 0.90 + heuristic_score * 0.10
     score = int(round(max(0.0, min(100.0, weighted))))
     label, severity = get_verdict_label(score)
     duration_ms = int((time.perf_counter() - start) * 1000)
         else settings.TEXT_MODEL_ID
     )
+    resp = TextAnalysisResponse(
         analysis_id=str(uuid.uuid4()),
         media_type="text",
         timestamp=datetime.now(timezone.utc).isoformat(),
             stages_completed=stages,
             total_duration_ms=duration_ms,
             model_used=model_used,
+            calibrator_applied=False,
         ),
     )
         media_type="text",
         verdict=label,
         authenticity_score=float(score),
+        result_json=json.dumps(resp.model_dump()),
     )
     db.add(record)
     db.commit()
     db.refresh(record)
+    resp.record_id = record.id
     logger.info(f"Saved AnalysisRecord id={record.id} text score={score} verdict={label}")
+    # Phase 12: LLM explainability card (authed users only — conserves LLM quota)
+    llm = _compute_llm_summary(resp, record_id=record.id, user=user, media_kind="text")
+    if llm:
+        resp.llm_summary = llm
+    return resp
 @router.post("/screenshot", response_model=ScreenshotAnalysisResponse)
+@limiter.limit(ANON_ANALYZE, exempt_when=is_authed)
+@limiter.limit(AUTH_ANALYZE, exempt_when=is_anon)
 async def analyze_screenshot_endpoint(
+    request: Request,
+    response: Response,
+    cache: bool = Query(default=True),
+    language_hint: str = Query(default="auto"),
     file: UploadFile = File(...),
     db: Session = Depends(get_db),
     user: User | None = Depends(optional_current_user),
     )
     stages.append("validation")
+    # Phase 19.1 — dedup cache
+    media_hash = sha256_bytes(raw)
+    cached = lookup_cached(db, media_hash=media_hash, media_type="screenshot", user_id=user.id if user else None) if cache else None
+    if cached is not None:
+        payload = cached_payload(cached)
+        if payload is not None:
+            logger.info(f"cache hit screenshot sha={media_hash[:12]} record={cached.id}")
+            return ScreenshotAnalysisResponse.model_validate(payload)
     pil = load_image_from_bytes(raw)
     ocr_boxes = run_ocr(pil)
     stages.append("ocr")
     full_text = extract_full_text(ocr_boxes)
     # Phase 13: language detection on extracted OCR text
+    lang = _resolve_language_hint(full_text, language_hint) if full_text else "en"
     stages.append("language_detection")
     clf = classify_text(full_text, language=lang) if full_text else None
     manip_penalty = min(len(manip) * 5, 30)
     layout_penalty = min(len(layout) * 5, 15)
     raw_score = (1.0 - effective_fake_prob) * 100.0
+    heuristic_score = (
+        max(0, 100 - sens.score) * 0.45
+        + max(0, 100 - manip_penalty) * 0.35
+        + max(0, 100 - layout_penalty) * 0.20
     )
+    weighted = raw_score * 0.90 + heuristic_score * 0.10
     if not full_text.strip():
         weighted = 50
     score = int(round(max(0.0, min(100.0, weighted))))
         else f"{settings.TEXT_MODEL_ID} + EasyOCR"
     )
+    resp = ScreenshotAnalysisResponse(
         analysis_id=str(uuid.uuid4()),
         media_type="screenshot",
         timestamp=datetime.now(timezone.utc).isoformat(),
             stages_completed=stages,
             total_duration_ms=duration_ms,
             model_used=model_used_str,
+            calibrator_applied=False,
         ),
     )
+    # Phase 19.2 — object storage + thumbnail
+    ext = (mime.split("/")[-1] if mime else "jpg").replace("jpeg", "jpg")
+    try:
+        media_path = save_bytes(raw, media_hash, ext)
+    except Exception as e:  # noqa: BLE001
+        logger.warning(f"screenshot media save failed: {e}")
+        media_path = None
+    thumbnail_url = make_image_thumbnail(pil, media_hash)
+    resp.thumbnail_url = thumbnail_url
     record = AnalysisRecord(
         user_id=user.id if user else None,
         media_type="screenshot",
         verdict=label,
         authenticity_score=float(score),
+        result_json=json.dumps(resp.model_dump()),
+        media_hash=media_hash,
+        media_path=media_path,
+        thumbnail_url=thumbnail_url,
     )
     db.add(record)
     db.commit()
     db.refresh(record)
+    resp.record_id = record.id
     logger.info(f"Saved AnalysisRecord id={record.id} screenshot score={score} verdict={label}")
+    # Phase 12: LLM explainability card (authed users only — conserves LLM quota)
+    llm = _compute_llm_summary(resp, record_id=record.id, user=user, media_kind="screenshot")
+    if llm:
+        resp.llm_summary = llm
+    return resp
+# ───────────────────────── Phase 19.3 — async video + jobs ─────────────────────────
+@router.post("/video/async", status_code=status.HTTP_202_ACCEPTED)
+@limiter.limit(ANON_ANALYZE, exempt_when=is_authed)
+@limiter.limit(AUTH_ANALYZE, exempt_when=is_anon)
+async def analyze_video_async(
+    request: Request,
+    response: Response,
+    background: BackgroundTasks,
+    cache: bool = Query(default=True),
+    language_hint: str = Query(default="auto"),
+    file: UploadFile = File(...),
+    db: Session = Depends(get_db),
+    user: User | None = Depends(optional_current_user),
+):
+    """Queue a video analysis and return a job_id. Poll GET /api/v1/jobs/{job_id}.
+    Used by the PipelineVisualizer so it can read real backend stage/progress
+    instead of guessing timing.
+    """
+    suffix = os.path.splitext(file.filename or "")[1].lower() or ".mp4"
+    path, _mime = await save_upload_to_tempfile(
+        file, settings.ALLOWED_VIDEO_TYPES, max_size_mb=VIDEO_MAX_MB, suffix=suffix
+    )
+    # Quick cache probe so callers don't wait for queue dispatch on repeats.
+    media_hash = sha256_file(path)
+    cached = lookup_cached(db, media_hash=media_hash, media_type="video", user_id=user.id if user else None) if cache else None
+    if cached is not None:
+        payload = cached_payload(cached)
+        try:
+            os.unlink(path)
+        except OSError:
+            pass
+        if payload is not None:
+            job = job_registry.create()
+            job_registry.update(job.id, status="done", stage="done", progress=100, result=payload)
+            return {"job_id": job.id, "status": "done", "cached": True}
+    user_id = user.id if user else None
+    job = job_registry.create()
+    def _work(progress):
+        from db.database import SessionLocal
+        local_db = SessionLocal()
+        try:
+            progress("frame_extraction", 15)
+            agg = analyze_video(path, num_frames=VIDEO_NUM_FRAMES)
+            progress("aggregation", 60)
+            audio_result = None
+            try:
+                audio_result = analyze_audio(path)
+            except Exception as _ae:  # noqa: BLE001
+                logger.warning(f"Audio analysis failed, continuing: {_ae}")
+            progress("audio_analysis", 75)
+            score_val, label_val, sev = compute_video_authenticity_score(
+                mean_suspicious_prob=agg.mean_suspicious_prob,
+                insufficient_faces=agg.insufficient_faces,
+                temporal_score=agg.temporal.temporal_score if agg.temporal else None,
+                audio_authenticity_score=audio_result.audio_authenticity_score if audio_result else None,
+                has_audio=bool(audio_result and audio_result.has_audio),
+            )
+            from schemas.analyze import AudioExplainability
+            audio_ex = None
+            if audio_result:
+                audio_ex = AudioExplainability(
+                    audio_authenticity_score=audio_result.audio_authenticity_score,
+                    has_audio=audio_result.has_audio,
+                    duration_s=audio_result.duration_s,
+                    silence_ratio=audio_result.silence_ratio,
+                    spectral_variance=audio_result.spectral_variance,
+                    rms_consistency=audio_result.rms_consistency,
+                    notes=audio_result.notes,
+                )
+            resp = VideoAnalysisResponse(
+                analysis_id=str(uuid.uuid4()),
+                media_type="video",
+                timestamp=datetime.now(timezone.utc).isoformat(),
+                verdict=Verdict(
+                    label=label_val, severity=sev,
+                    authenticity_score=score_val,
+                    model_confidence=float(agg.mean_suspicious_prob),
+                    model_label="suspicious_mean" if not agg.insufficient_faces else "no_faces",
+                ),
+                explainability=VideoExplainability(
+                    num_frames_sampled=agg.num_frames_sampled,
+                    num_face_frames=agg.num_face_frames,
+                    num_suspicious_frames=agg.num_suspicious_frames,
+                    mean_suspicious_prob=agg.mean_suspicious_prob,
+                    max_suspicious_prob=agg.max_suspicious_prob,
+                    suspicious_ratio=agg.suspicious_ratio,
+                    insufficient_faces=agg.insufficient_faces,
+                    suspicious_timestamps=agg.suspicious_timestamps,
+                    frames=[
+                        FrameAnalysisOut(
+                            index=f.index, timestamp_s=f.timestamp_s,
+                            label=f.label, confidence=f.confidence,
+                            suspicious_prob=f.suspicious_prob, is_suspicious=f.is_suspicious,
+                            has_face=f.has_face, scored=f.scored,
+                        ) for f in agg.frames
+                    ],
+                    temporal_score=agg.temporal.temporal_score if agg.temporal else None,
+                    optical_flow_variance=agg.temporal.optical_flow_variance if agg.temporal else None,
+                    flicker_score=agg.temporal.flicker_score if agg.temporal else None,
+                    blink_rate_anomaly=agg.temporal.blink_rate_anomaly if agg.temporal else None,
+                    audio=audio_ex,
+                ),
+                processing_summary=ProcessingSummary(
+                    stages_completed=["frame_extraction", "classification", "aggregation"],
+                    total_duration_ms=0,
+                    model_used=settings.IMAGE_MODEL_ID,
+                    models_used=agg.models_used,
+                    calibrator_applied=agg.calibrator_applied,
+                ),
+            )
+            progress("storage", 85)
+            try:
+                media_path = save_file(path, media_hash, suffix.lstrip("."))
+            except Exception as e:  # noqa: BLE001
+                logger.warning(f"async video media save failed: {e}")
+                media_path = None
+            thumb = make_video_thumbnail(path, media_hash)
+            resp.thumbnail_url = thumb
+            rec = AnalysisRecord(
+                user_id=user_id,
+                media_type="video",
+                verdict=label_val,
+                authenticity_score=float(score_val),
+                result_json=json.dumps(resp.model_dump()),
+                media_hash=media_hash,
+                media_path=media_path,
+                thumbnail_url=thumb,
+            )
+            local_db.add(rec)
+            local_db.commit()
+            local_db.refresh(rec)
+            resp.record_id = rec.id
+            progress("persist", 95)
+            return resp.model_dump()
+        finally:
+            local_db.close()
+            try:
+                os.unlink(path)
+            except OSError:
+                pass
+    stages = ["queued", "frame_extraction", "aggregation", "audio_analysis", "storage", "persist", "done"]
+    background.add_task(run_job, job.id, stages, _work)
+    return {"job_id": job.id, "status": "queued", "cached": False}
+jobs_router = APIRouter(prefix="/jobs", tags=["jobs"])
+@jobs_router.get("/{job_id}")
+def get_job(job_id: str):
+    j = job_registry.get(job_id)
+    if not j:
+        raise HTTPException(status_code=404, detail="job not found")
+    return {
+        "id": j.id,
+        "status": j.status,
+        "stage": j.stage,
+        "progress": j.progress,
+        "error": j.error,
+        "result": j.result if j.status == "done" else None,
+    }