Spaces:
Running
Running
| """Phase 19.1 — SHA-256 media dedup cache. | |
| Looks up a prior AnalysisRecord by content hash within CACHE_TTL_DAYS, and | |
| returns the cached payload so repeated uploads of the same file skip the | |
| expensive analyzer pipelines. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import os | |
| from datetime import datetime, timedelta | |
| from loguru import logger | |
| from sqlalchemy.orm import Session | |
| from db.models import AnalysisRecord | |
| from schemas.common import ANALYSIS_CACHE_VERSION | |
| CACHE_TTL_DAYS = int(os.environ.get("CACHE_TTL_DAYS", "30")) | |
| def lookup_cached( | |
| db: Session, | |
| *, | |
| media_hash: str, | |
| media_type: str, | |
| user_id: int | None, | |
| ) -> AnalysisRecord | None: | |
| """Return a cached AnalysisRecord for this hash+type if within TTL. | |
| We scope the cache by user when the user is signed in (their own history | |
| should return their own cached record) and globally when anonymous. | |
| """ | |
| if not media_hash: | |
| return None | |
| cutoff = datetime.utcnow() - timedelta(days=CACHE_TTL_DAYS) | |
| q = ( | |
| db.query(AnalysisRecord) | |
| .filter( | |
| AnalysisRecord.media_hash == media_hash, | |
| AnalysisRecord.media_type == media_type, | |
| AnalysisRecord.created_at >= cutoff, | |
| ) | |
| .order_by(AnalysisRecord.created_at.desc()) | |
| ) | |
| if user_id is not None: | |
| return q.filter(AnalysisRecord.user_id == user_id).first() | |
| return q.filter(AnalysisRecord.user_id.is_(None)).first() | |
| def cached_payload(record: AnalysisRecord) -> dict | None: | |
| """Decode stored result_json and stamp the cached flag.""" | |
| try: | |
| payload = json.loads(record.result_json) | |
| except Exception as e: # noqa: BLE001 | |
| logger.warning(f"cached payload decode failed for record {record.id}: {e}") | |
| return None | |
| summary = payload.get("processing_summary") or {} | |
| if summary.get("analysis_version") != ANALYSIS_CACHE_VERSION: | |
| logger.info(f"cache stale for record {record.id}: analysis_version mismatch") | |
| return None | |
| payload["cached"] = True | |
| payload["record_id"] = record.id | |
| return payload | |