Spaces:
Running
Running
| import json | |
| import secrets | |
| from urllib.parse import parse_qsl, urlencode | |
| from typing import Any | |
| from pydantic import field_validator, model_validator | |
| from pydantic_settings import BaseSettings, SettingsConfigDict | |
| def _parse_list_env(value: Any, default: list[str]) -> list[str]: | |
| """Accept list env values as JSON, CSV, single-value string, or native list.""" | |
| if value is None: | |
| return default | |
| if isinstance(value, str): | |
| raw = value.strip() | |
| if not raw: | |
| return default | |
| if raw.startswith("["): | |
| try: | |
| parsed = json.loads(raw) | |
| if isinstance(parsed, list): | |
| cleaned = [str(i).strip() for i in parsed if str(i).strip()] | |
| return cleaned or default | |
| except json.JSONDecodeError: | |
| # Fall back to CSV parsing if JSON is malformed. | |
| pass | |
| cleaned = [i.strip() for i in raw.split(",") if i.strip()] | |
| return cleaned or default | |
| if isinstance(value, list): | |
| cleaned = [str(i).strip() for i in value if str(i).strip()] | |
| return cleaned or default | |
| return default | |
| def _normalize_origin(origin: str) -> str: | |
| """Normalize CORS origin values to avoid strict mismatch (e.g. trailing slash).""" | |
| cleaned = origin.strip() | |
| if cleaned.startswith(("http://", "https://")): | |
| cleaned = cleaned.rstrip("/") | |
| return cleaned | |
| def _fix_postgres_url(raw: str) -> str: | |
| """Normalize common Postgres URL mistakes from deployment envs. | |
| - Converts postgres:// to postgresql:// | |
| - Encodes stray '@' in credentials (usually from unescaped passwords) | |
| - Ensures sslmode=require for Supabase URLs when missing | |
| """ | |
| url = raw.strip() | |
| if url.startswith("postgres://"): | |
| url = "postgresql://" + url[len("postgres://") :] | |
| if not url.startswith("postgresql://"): | |
| return url | |
| # Split scheme + authority/path safely without full URL parsing. | |
| rest = url[len("postgresql://") :] | |
| if "@" in rest: | |
| userinfo, remainder = rest.rsplit("@", 1) | |
| # Any '@' left in userinfo belongs to credentials and must be percent-encoded. | |
| userinfo = userinfo.replace("@", "%40") | |
| url = "postgresql://" + userinfo + "@" + remainder | |
| if "supabase.co" in url: | |
| if "?" in url: | |
| base, query = url.split("?", 1) | |
| params = dict(parse_qsl(query, keep_blank_values=True)) | |
| if "sslmode" not in params: | |
| params["sslmode"] = "require" | |
| url = base + "?" + urlencode(params) | |
| else: | |
| url = url + "?sslmode=require" | |
| return url | |
| class Settings(BaseSettings): | |
| # Server | |
| APP_HOST: str = "0.0.0.0" | |
| APP_PORT: int = 8000 | |
| DEBUG: bool = False | |
| CORS_ORIGINS: Any = ["http://localhost:5173"] | |
| def drop_blank_values(cls, data: Any) -> Any: | |
| """Treat blank env vars as unset so defaults apply instead of parse errors.""" | |
| if isinstance(data, dict): | |
| return { | |
| k: v | |
| for k, v in data.items() | |
| if not (isinstance(v, str) and not v.strip()) | |
| } | |
| return data | |
| def assemble_cors_origins(cls, v: Any) -> list[str]: | |
| """Parse CORS_ORIGINS from string (JSON or comma-separated) into a list.""" | |
| origins = _parse_list_env(v, default=["http://localhost:5173"]) | |
| normalized = [_normalize_origin(i) for i in origins if _normalize_origin(i)] | |
| return normalized or ["http://localhost:5173"] | |
| # Database | |
| DATABASE_URL: str = "sqlite:///./deepshield.db" | |
| def normalize_database_url(cls, v: Any) -> str: | |
| """Support common HF-style postgres URL aliases and blank values.""" | |
| if v is None: | |
| return "sqlite:///./deepshield.db" | |
| if isinstance(v, str): | |
| raw = v.strip() | |
| if not raw: | |
| return "sqlite:///./deepshield.db" | |
| return _fix_postgres_url(raw) | |
| return str(v) | |
| # File Upload | |
| MAX_UPLOAD_SIZE_MB: int = 100 | |
| UPLOAD_DIR: str = "/data/uploads" | |
| ALLOWED_IMAGE_TYPES: list[str] = ["image/jpeg", "image/png", "image/webp"] | |
| ALLOWED_VIDEO_TYPES: list[str] = ["video/mp4", "video/avi", "video/mov", "video/webm"] | |
| FILE_RETENTION_SECONDS: int = 300 | |
| # AI Models | |
| IMAGE_MODEL_ID: str = "prithivMLmods/Deep-Fake-Detector-v2-Model" | |
| GENERAL_IMAGE_MODEL_ID: str = "umm-maybe/AI-image-detector" | |
| # Phase C1/C2: second AI-image head specialised on diffusion/GAN output. | |
| # Ensembled with the general detector before feeding face-present fusion. | |
| # Set to "" to disable (falls back to general detector only). | |
| DIFFUSION_IMAGE_MODEL_ID: str = "haywoodsloan/ai-image-detector-deploy" | |
| DIFFUSION_MODEL_ENABLED: bool = True | |
| # Blend weights for the two-head general ensemble (must sum ≤ 1.0). | |
| # When only one head is available the available head gets full weight. | |
| GENERAL_AI_WEIGHT: float = 0.45 | |
| DIFFUSION_AI_WEIGHT: float = 0.55 | |
| # Temperature scaling for each detector head (> 1.0 = softer probabilities, | |
| # < 1.0 = sharper). 1.0 = no scaling. Tune after running run_image_eval.py. | |
| GENERAL_MODEL_TEMPERATURE: float = 1.0 | |
| DIFFUSION_MODEL_TEMPERATURE: float = 1.0 | |
| TEXT_MODEL_ID: str = "jy46604790/Fake-News-Bert-Detect" | |
| # Multilingual text model for non-English (Hindi etc.). Leave empty to fall back to TEXT_MODEL_ID. | |
| TEXT_MULTILANG_MODEL_ID: str = "" | |
| DEVICE: str = "cpu" | |
| PRELOAD_MODELS: bool = True # preload models at startup | |
| # Phase 13: OCR language list (comma-separated ISO codes, e.g. "en,hi") | |
| OCR_LANGS: str = "en,hi" | |
| # News API | |
| NEWS_API_KEY: str = "" | |
| NEWS_API_BASE_URL: str = "https://newsdata.io/api/1/latest" | |
| NEWS_API_ARCHIVE_BASE_URL: str = "https://newsdata.io/api/1/archive" | |
| NEWS_API_LANGUAGES: str = "en,hi" | |
| NEWS_API_RECENT_TIMEFRAME: str = "1" | |
| NEWS_API_OLDER_DAYS: int = 7 | |
| NEWS_API_PAGE_SIZE: int = 10 | |
| NEWS_API_PRIMARY_COUNTRY: str = "in" | |
| # Reports | |
| REPORT_DIR: str = "/data/reports" | |
| REPORT_TTL_SECONDS: int = 3600 # 1h expiry | |
| PUBLIC_APP_URL: str = "" | |
| # Public backend origin or API base URL used for third-party OAuth callbacks. | |
| # Examples: http://localhost:8000, https://api.example.com, https://api.example.com/api/v1 | |
| PUBLIC_API_URL: str = "" | |
| # Phase 19 — dedup cache + object storage | |
| CACHE_TTL_DAYS: int = 30 | |
| MEDIA_ROOT: str = "/data/media" | |
| MEDIA_SIGNED_URL_TTL_SECONDS: int = 3600 | |
| # LLM Explainability (Phase 12) | |
| LLM_PROVIDER: str = "gemini" # "gemini" | "openai" | |
| LLM_API_KEY: str = "" | |
| LLM_MODEL: str = "gemini-2.0-flash" # 2.0-flash: fastest response, no thinking overhead, best for real-time summaries. | |
| # LLM fallback — Groq (Llama 3.3 70B by default). Used automatically when the | |
| # primary provider returns 429/quota exceeded. Leave empty to disable fallback. | |
| GROQ_API_KEY: str = "" | |
| GROQ_MODEL: str = "llama-3.3-70b-versatile" | |
| # EfficientNet (ICPR2020 / DeepShield1 merge) | |
| EFFICIENTNET_MODEL: str = "EfficientNetAutoAttB4" | |
| EFFICIENTNET_TRAIN_DB: str = "DFDC" | |
| ENSEMBLE_MODE: bool = True # run both ViT + EfficientNet and average scores | |
| # Phase 11.3: FFPP-fine-tuned ViT. Path is resolved relative to the repo root. | |
| # The checkpoint lives at <repo_root>/trained_models/ (the `trained_models/` dir | |
| # at the project root, alongside `backend/` and `frontend/`). | |
| FFPP_MODEL_PATH: str = "trained_models" | |
| # Optional: pull FFPP checkpoint from Hugging Face Hub when local checkpoint | |
| # is missing (keeps large model files out of GitHub source repo). | |
| FFPP_MODEL_REPO_ID: str = "" | |
| FFPP_MODEL_REVISION: str = "main" | |
| FFPP_BASE_PROCESSOR_ID: str = "google/vit-base-patch16-224-in21k" | |
| FFPP_ENABLED: bool = True | |
| # DenseNet121 face-GAN specialist (in-house trained on 140k Kaggle dataset). | |
| # Loaded from a TF-free PyTorch checkpoint converted via convert_densenet_keras_to_pt.py. | |
| DENSENET_ENABLED: bool = True | |
| # Path to .pt checkpoint, resolved relative to repo root (or absolute). | |
| DENSENET_MODEL_PATH: str = "backend/trained_models/densenet121_faces.pt" | |
| DENSENET_META_PATH: str = "backend/trained_models/densenet121_faces_meta.json" | |
| # HF Space fallback when local checkpoint is absent. | |
| DENSENET_HF_REPO_ID: str = "ar07xd/deepshield" | |
| DENSENET_HF_REVISION: str = "main" | |
| # Ensemble weights — DenseNet leads because it is trained on still-image GAN | |
| # faces (the dominant upload type). FFPP / EffNet are stronger on video frames. | |
| # Face-stack internal weights (sum = 1.0). | |
| DENSENET_WEIGHT_FACE: float = 0.45 | |
| FFPP_WEIGHT_FACE: float = 0.25 | |
| VIT_WEIGHT_FACE: float = 0.15 | |
| EFFNET_WEIGHT_FACE: float = 0.15 | |
| # Video-frame path: FFPP leads since FFPP is trained on FF++ video frames. | |
| DENSENET_VIDEO_WEIGHT: float = 0.10 | |
| VIDEO_FFPP_WEIGHT_FACE: float = 0.50 | |
| VIDEO_EFFNET_WEIGHT_FACE: float = 0.30 | |
| VIDEO_VIT_WEIGHT_FACE: float = 0.10 | |
| FFPP_WEIGHT_NOFACE: float = 0.35 | |
| VIT_WEIGHT_NOFACE: float = 0.65 | |
| # Face-present unified evidence weights (Phase A2/A3). | |
| # face_stack = composite of FFPP+ViT+EffNet (all face-swap models). | |
| # general = generic AI-image detector (diffusion/GAN whole-image cues). | |
| # forensics = artifact scanner output. | |
| # exif = camera-metadata trust signal. | |
| # vlm = VLM consistency breakdown (optional). | |
| FACE_STACK_WEIGHT_FACE: float = 0.40 | |
| GENERAL_WEIGHT_FACE: float = 0.40 | |
| FORENSICS_WEIGHT_FACE: float = 0.10 | |
| EXIF_WEIGHT_FACE: float = 0.05 | |
| VLM_WEIGHT_FACE: float = 0.05 | |
| # No-face evidence weights (existing behavior preserved). | |
| NOFACE_GENERAL_WEIGHT: float = 0.60 | |
| NOFACE_FORENSICS_WEIGHT: float = 0.20 | |
| NOFACE_EXIF_WEIGHT: float = 0.10 | |
| NOFACE_VLM_WEIGHT: float = 0.10 | |
| # Hard gating thresholds (Phase A4). When the general detector is highly | |
| # confident the image is synthetic, or strong GAN artifacts are present, | |
| # the final fake probability is floored at GATING_FAKE_FLOOR (0.50 maps to | |
| # authenticity score 50, i.e. cannot land in "Likely Real" or above). | |
| GENERAL_FAKE_GATING_THRESHOLD: float = 0.80 | |
| GAN_ARTIFACT_GATING_THRESHOLD: float = 0.70 | |
| GATING_FAKE_FLOOR: float = 0.50 | |
| # Synthetic still-image overrides. FaceForensics/DFDC models are trained for | |
| # manipulated video faces, so they should not veto a strong still-image AI | |
| # detector on generated portraits. | |
| SYNTHETIC_STILL_HIGH_THRESHOLD: float = 0.80 | |
| SYNTHETIC_STILL_HIGH_FLOOR: float = 0.80 | |
| SYNTHETIC_STILL_VERY_HIGH_THRESHOLD: float = 0.90 | |
| SYNTHETIC_STILL_VERY_HIGH_FLOOR: float = 0.90 | |
| # Video-frame weight overrides. When an image is detected as a low-res | |
| # video frame (face-swap deepfakes are extracted from video), the general | |
| # AI-image detector is unreliable (it's trained on synthesised stills, not | |
| # video face-swaps). We shift weight strongly toward the face-swap-trained | |
| # models (FFPP / EfficientNet) in that case. | |
| VIDEO_FRAME_FACE_STACK_WEIGHT: float = 0.55 | |
| VIDEO_FRAME_GENERAL_WEIGHT: float = 0.30 | |
| VIDEO_FRAME_FORENSICS_WEIGHT: float = 0.10 | |
| VIDEO_FRAME_EXIF_WEIGHT: float = 0.05 | |
| # Per-frame video detector blend. FFPP ViT is trained on FaceForensics++ | |
| # face forgery frames, so it is the dominant signal for video analysis. | |
| VIDEO_FFPP_WEIGHT: float = 0.70 | |
| VIDEO_EFFNET_WEIGHT: float = 0.30 | |
| VIDEO_SAMPLE_FRAMES: int = 32 # frames to sample per video for inference | |
| EXIFTOOL_PATH: str = "" # full path to ExifTool binary; empty = metadata write disabled | |
| # Auth | |
| JWT_SECRET_KEY: str = "" | |
| JWT_SECRET_KEY_GENERATED: bool = False | |
| JWT_ALGORITHM: str = "HS256" | |
| JWT_EXPIRATION_MINUTES: int = 1440 | |
| GOOGLE_CLIENT_ID: str = "" | |
| GOOGLE_CLIENT_SECRET: str = "" | |
| GITHUB_CLIENT_ID: str = "" | |
| GITHUB_CLIENT_SECRET: str = "" | |
| def ensure_jwt_secret(self): | |
| if not self.JWT_SECRET_KEY: | |
| if self.DEBUG: | |
| self.JWT_SECRET_KEY = secrets.token_urlsafe(48) | |
| self.JWT_SECRET_KEY_GENERATED = True | |
| else: | |
| self.JWT_SECRET_KEY = secrets.token_urlsafe(48) | |
| self.JWT_SECRET_KEY_GENERATED = True | |
| else: | |
| self.JWT_SECRET_KEY_GENERATED = False | |
| return self | |
| def assemble_allowed_image_types(cls, v: Any) -> list[str]: | |
| return _parse_list_env(v, default=["image/jpeg", "image/png", "image/webp"]) | |
| def assemble_allowed_video_types(cls, v: Any) -> list[str]: | |
| return _parse_list_env(v, default=["video/mp4", "video/avi", "video/mov", "video/webm"]) | |
| model_config = SettingsConfigDict(env_file=".env", extra="ignore") | |
| settings = Settings() | |