Spaces:
Running
Running
| from pathlib import Path | |
| from typing import Optional, List | |
| import tempfile | |
| import threading | |
| from fastapi import FastAPI, UploadFile, File, Form, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel, Field | |
| from PIL import Image | |
| import torch | |
| from sentence_transformers import SentenceTransformer, util | |
| MODEL_NAME = "Qwen/Qwen3-VL-Embedding-2B" | |
| app = FastAPI( | |
| title="Visual Evidence Verification API", | |
| description=( | |
| "Verifies whether an uploaded image supports a multilingual citizen " | |
| "complaint using Qwen3-VL multimodal embeddings." | |
| ), | |
| version="1.0.0", | |
| ) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], # later replace with your Vercel frontend URL | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # ========================= | |
| # API Schemas | |
| # ========================= | |
| class VerificationResponse(BaseModel): | |
| complaint_text: str | |
| image_match_score: float | |
| verification_status: str | |
| image_supports_complaint: bool | |
| strong_threshold: float | |
| partial_threshold: float | |
| method: str | |
| model: str | |
| class HealthResponse(BaseModel): | |
| status: str | |
| model_name: str | |
| model_loaded: bool | |
| device: str | |
| # ========================= | |
| # Service | |
| # ========================= | |
| class VisualEvidenceVerifier: | |
| """ | |
| Multilingual image-text verification using Qwen3-VL embeddings. | |
| Logic: | |
| - Encode complaint text | |
| - Encode uploaded image | |
| - Compare embeddings using cosine similarity | |
| - Return match/partial/weak verification result | |
| """ | |
| def __init__( | |
| self, | |
| model_name: str = MODEL_NAME, | |
| strong_threshold: float = 0.55, | |
| partial_threshold: float = 0.35, | |
| ): | |
| self.model_name = model_name | |
| self.strong_threshold = strong_threshold | |
| self.partial_threshold = partial_threshold | |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| self.model: Optional[SentenceTransformer] = None | |
| self._lock = threading.Lock() | |
| def load_model(self): | |
| """ | |
| Lazy model loading. | |
| This prevents the Space from failing during startup if loading is slow. | |
| First /verify request will load the model. | |
| """ | |
| if self.model is None: | |
| with self._lock: | |
| if self.model is None: | |
| self.model = SentenceTransformer( | |
| self.model_name, | |
| device=self.device, | |
| ) | |
| return self.model | |
| def _load_image(self, image_path: Path) -> Image.Image: | |
| try: | |
| return Image.open(image_path).convert("RGB") | |
| except Exception as error: | |
| raise ValueError(f"Invalid image file: {error}") | |
| def _decide_status(self, score: float): | |
| if score >= self.strong_threshold: | |
| return "strong_match", True | |
| if score >= self.partial_threshold: | |
| return "partial_match", True | |
| return "weak_match", False | |
| def verify( | |
| self, | |
| complaint_text: str, | |
| image_path: Path, | |
| ) -> VerificationResponse: | |
| if not complaint_text or len(complaint_text.strip()) < 3: | |
| raise ValueError("Complaint text is too short.") | |
| if not image_path.exists(): | |
| raise FileNotFoundError(f"Image not found: {image_path}") | |
| model = self.load_model() | |
| image = self._load_image(image_path) | |
| text_embedding = model.encode( | |
| [complaint_text], | |
| convert_to_tensor=True, | |
| normalize_embeddings=True, | |
| ) | |
| image_embedding = model.encode( | |
| [image], | |
| convert_to_tensor=True, | |
| normalize_embeddings=True, | |
| ) | |
| score = float(util.cos_sim(text_embedding, image_embedding)[0][0]) | |
| status, supports = self._decide_status(score) | |
| return VerificationResponse( | |
| complaint_text=complaint_text, | |
| image_match_score=round(score, 4), | |
| verification_status=status, | |
| image_supports_complaint=supports, | |
| strong_threshold=self.strong_threshold, | |
| partial_threshold=self.partial_threshold, | |
| method="qwen3_vl_embedding_image_text_similarity", | |
| model=self.model_name, | |
| ) | |
| verifier = VisualEvidenceVerifier() | |
| # ========================= | |
| # Routes | |
| # ========================= | |
| def home(): | |
| return HealthResponse( | |
| status="running", | |
| model_name=MODEL_NAME, | |
| model_loaded=verifier.model is not None, | |
| device=verifier.device, | |
| ) | |
| def health(): | |
| return HealthResponse( | |
| status="ok", | |
| model_name=MODEL_NAME, | |
| model_loaded=verifier.model is not None, | |
| device=verifier.device, | |
| ) | |
| def load_model(): | |
| """ | |
| Optional endpoint to warm up the model before demo. | |
| First call may take time. | |
| """ | |
| verifier.load_model() | |
| return { | |
| "status": "loaded", | |
| "model": MODEL_NAME, | |
| "device": verifier.device, | |
| } | |
| async def verify_image_evidence( | |
| complaint_text: str = Form(...), | |
| file: UploadFile = File(...), | |
| ): | |
| allowed_extensions = {".jpg", ".jpeg", ".png", ".webp"} | |
| suffix = Path(file.filename).suffix.lower() | |
| if suffix not in allowed_extensions: | |
| raise HTTPException( | |
| status_code=400, | |
| detail=f"Unsupported image type '{suffix}'. Use jpg, jpeg, png, or webp.", | |
| ) | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file: | |
| temp_path = Path(temp_file.name) | |
| temp_file.write(await file.read()) | |
| try: | |
| return verifier.verify( | |
| complaint_text=complaint_text, | |
| image_path=temp_path, | |
| ) | |
| except Exception as error: | |
| raise HTTPException(status_code=500, detail=str(error)) | |
| finally: | |
| if temp_path.exists(): | |
| temp_path.unlink() | |
| def debug_compare_texts( | |
| text_a: str = Form(...), | |
| text_b: str = Form(...), | |
| ): | |
| """ | |
| Debug endpoint to verify model embedding similarity for two texts. | |
| Useful before testing image upload. | |
| """ | |
| model = verifier.load_model() | |
| embeddings = model.encode( | |
| [text_a, text_b], | |
| convert_to_tensor=True, | |
| normalize_embeddings=True, | |
| ) | |
| score = float(util.cos_sim(embeddings[0], embeddings[1])) | |
| return { | |
| "text_a": text_a, | |
| "text_b": text_b, | |
| "similarity_score": round(score, 4), | |
| "model": MODEL_NAME, | |
| } |