Spaces:

kanhacoderx
/

Image-Text-verify

Running

App Files Files Community

kanhacoderx commited on 17 days ago

Commit

eafadff

verified ·

1 Parent(s): e4c9f32

Upload 4 files

Browse files

Files changed (4) hide show

Dockerfile +16 -0
README.md +13 -10
app.py +259 -0
requirements.txt +10 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+FROM python:3.11-slim
+RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir --upgrade pip
+RUN pip install --no-cache-dir -r requirements.txt
+COPY app.py .
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,13 @@
----
-title: Image Text Verify
-emoji: 🚀
-colorFrom: purple
-colorTo: gray
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Visual Evidence Verification API
+emoji: 🖼️
+colorFrom: blue
+colorTo: green
+sdk: docker
+app_port: 7860
+pinned: false
+---
+# Visual Evidence Verification API
+FastAPI backend that verifies whether an uploaded image supports a multilingual citizen complaint

app.py ADDED Viewed

	@@ -0,0 +1,259 @@

+from pathlib import Path
+from typing import Optional, List
+import tempfile
+import threading
+from fastapi import FastAPI, UploadFile, File, Form, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+from PIL import Image
+import torch
+from sentence_transformers import SentenceTransformer, util
+MODEL_NAME = "Qwen/Qwen3-VL-Embedding-2B"
+app = FastAPI(
+    title="Visual Evidence Verification API",
+    description=(
+        "Verifies whether an uploaded image supports a multilingual citizen "
+        "complaint using Qwen3-VL multimodal embeddings."
+    ),
+    version="1.0.0",
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # later replace with your Vercel frontend URL
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# =========================
+# API Schemas
+# =========================
+class VerificationResponse(BaseModel):
+    complaint_text: str
+    image_match_score: float
+    verification_status: str
+    image_supports_complaint: bool
+    strong_threshold: float
+    partial_threshold: float
+    method: str
+    model: str
+class HealthResponse(BaseModel):
+    status: str
+    model_name: str
+    model_loaded: bool
+    device: str
+# =========================
+# Service
+# =========================
+class VisualEvidenceVerifier:
+    """
+    Multilingual image-text verification using Qwen3-VL embeddings.
+    Logic:
+    - Encode complaint text
+    - Encode uploaded image
+    - Compare embeddings using cosine similarity
+    - Return match/partial/weak verification result
+    """
+    def __init__(
+        self,
+        model_name: str = MODEL_NAME,
+        strong_threshold: float = 0.55,
+        partial_threshold: float = 0.35,
+    ):
+        self.model_name = model_name
+        self.strong_threshold = strong_threshold
+        self.partial_threshold = partial_threshold
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model: Optional[SentenceTransformer] = None
+        self._lock = threading.Lock()
+    def load_model(self):
+        """
+        Lazy model loading.
+        This prevents the Space from failing during startup if loading is slow.
+        First /verify request will load the model.
+        """
+        if self.model is None:
+            with self._lock:
+                if self.model is None:
+                    self.model = SentenceTransformer(
+                        self.model_name,
+                        device=self.device,
+                    )
+        return self.model
+    def _load_image(self, image_path: Path) -> Image.Image:
+        try:
+            return Image.open(image_path).convert("RGB")
+        except Exception as error:
+            raise ValueError(f"Invalid image file: {error}")
+    def _decide_status(self, score: float):
+        if score >= self.strong_threshold:
+            return "strong_match", True
+        if score >= self.partial_threshold:
+            return "partial_match", True
+        return "weak_match", False
+    def verify(
+        self,
+        complaint_text: str,
+        image_path: Path,
+    ) -> VerificationResponse:
+        if not complaint_text or len(complaint_text.strip()) < 3:
+            raise ValueError("Complaint text is too short.")
+        if not image_path.exists():
+            raise FileNotFoundError(f"Image not found: {image_path}")
+        model = self.load_model()
+        image = self._load_image(image_path)
+        text_embedding = model.encode(
+            [complaint_text],
+            convert_to_tensor=True,
+            normalize_embeddings=True,
+        )
+        image_embedding = model.encode(
+            [image],
+            convert_to_tensor=True,
+            normalize_embeddings=True,
+        )
+        score = float(util.cos_sim(text_embedding, image_embedding)[0][0])
+        status, supports = self._decide_status(score)
+        return VerificationResponse(
+            complaint_text=complaint_text,
+            image_match_score=round(score, 4),
+            verification_status=status,
+            image_supports_complaint=supports,
+            strong_threshold=self.strong_threshold,
+            partial_threshold=self.partial_threshold,
+            method="qwen3_vl_embedding_image_text_similarity",
+            model=self.model_name,
+        )
+verifier = VisualEvidenceVerifier()
+# =========================
+# Routes
+# =========================
+@app.get("/", response_model=HealthResponse)
+def home():
+    return HealthResponse(
+        status="running",
+        model_name=MODEL_NAME,
+        model_loaded=verifier.model is not None,
+        device=verifier.device,
+    )
+@app.get("/health", response_model=HealthResponse)
+def health():
+    return HealthResponse(
+        status="ok",
+        model_name=MODEL_NAME,
+        model_loaded=verifier.model is not None,
+        device=verifier.device,
+    )
+@app.post("/load-model")
+def load_model():
+    """
+    Optional endpoint to warm up the model before demo.
+    First call may take time.
+    """
+    verifier.load_model()
+    return {
+        "status": "loaded",
+        "model": MODEL_NAME,
+        "device": verifier.device,
+    }
+@app.post("/verify-image-evidence", response_model=VerificationResponse)
+async def verify_image_evidence(
+    complaint_text: str = Form(...),
+    file: UploadFile = File(...),
+):
+    allowed_extensions = {".jpg", ".jpeg", ".png", ".webp"}
+    suffix = Path(file.filename).suffix.lower()
+    if suffix not in allowed_extensions:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Unsupported image type '{suffix}'. Use jpg, jpeg, png, or webp.",
+        )
+    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
+        temp_path = Path(temp_file.name)
+        temp_file.write(await file.read())
+    try:
+        return verifier.verify(
+            complaint_text=complaint_text,
+            image_path=temp_path,
+        )
+    except Exception as error:
+        raise HTTPException(status_code=500, detail=str(error))
+    finally:
+        if temp_path.exists():
+            temp_path.unlink()
+@app.post("/debug-compare-texts")
+def debug_compare_texts(
+    text_a: str = Form(...),
+    text_b: str = Form(...),
+):
+    """
+    Debug endpoint to verify model embedding similarity for two texts.
+    Useful before testing image upload.
+    """
+    model = verifier.load_model()
+    embeddings = model.encode(
+        [text_a, text_b],
+        convert_to_tensor=True,
+        normalize_embeddings=True,
+    )
+    score = float(util.cos_sim(embeddings[0], embeddings[1]))
+    return {
+        "text_a": text_a,
+        "text_b": text_b,
+        "similarity_score": round(score, 4),
+        "model": MODEL_NAME,
+    }

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+fastapi==0.121.0
+uvicorn[standard]==0.38.0
+python-multipart==0.0.20
+pillow==11.3.0
+torch
+sentence-transformers>=5.1.0
+transformers>=4.57.0
+accelerate>=1.10.0
+einops
+timm