Spaces:

kanhacoderx
/

Image-Text-verify

Running

App Files Files Community

Image-Text-verify / app.py

kanhacoderx

Upload 4 files

eafadff verified 17 days ago

raw

history blame contribute delete

7.02 kB

	from pathlib import Path
	from typing import Optional, List
	import tempfile
	import threading

	from fastapi import FastAPI, UploadFile, File, Form, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel, Field
	from PIL import Image

	import torch
	from sentence_transformers import SentenceTransformer, util


	MODEL_NAME = "Qwen/Qwen3-VL-Embedding-2B"


	app = FastAPI(
	title="Visual Evidence Verification API",
	description=(
	"Verifies whether an uploaded image supports a multilingual citizen "
	"complaint using Qwen3-VL multimodal embeddings."
	),
	version="1.0.0",
	)

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"], # later replace with your Vercel frontend URL
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)


	# =========================
	# API Schemas
	# =========================

	class VerificationResponse(BaseModel):
	complaint_text: str
	image_match_score: float
	verification_status: str
	image_supports_complaint: bool
	strong_threshold: float
	partial_threshold: float
	method: str
	model: str


	class HealthResponse(BaseModel):
	status: str
	model_name: str
	model_loaded: bool
	device: str


	# =========================
	# Service
	# =========================

	class VisualEvidenceVerifier:
	"""
	Multilingual image-text verification using Qwen3-VL embeddings.

	Logic:
	- Encode complaint text
	- Encode uploaded image
	- Compare embeddings using cosine similarity
	- Return match/partial/weak verification result
	"""

	def __init__(
	self,
	model_name: str = MODEL_NAME,
	strong_threshold: float = 0.55,
	partial_threshold: float = 0.35,
	):
	self.model_name = model_name
	self.strong_threshold = strong_threshold
	self.partial_threshold = partial_threshold

	self.device = "cuda" if torch.cuda.is_available() else "cpu"

	self.model: Optional[SentenceTransformer] = None
	self._lock = threading.Lock()

	def load_model(self):
	"""
	Lazy model loading.
	This prevents the Space from failing during startup if loading is slow.
	First /verify request will load the model.
	"""
	if self.model is None:
	with self._lock:
	if self.model is None:
	self.model = SentenceTransformer(
	self.model_name,
	device=self.device,
	)

	return self.model

	def _load_image(self, image_path: Path) -> Image.Image:
	try:
	return Image.open(image_path).convert("RGB")
	except Exception as error:
	raise ValueError(f"Invalid image file: {error}")

	def _decide_status(self, score: float):
	if score >= self.strong_threshold:
	return "strong_match", True

	if score >= self.partial_threshold:
	return "partial_match", True

	return "weak_match", False

	def verify(
	self,
	complaint_text: str,
	image_path: Path,
	) -> VerificationResponse:
	if not complaint_text or len(complaint_text.strip()) < 3:
	raise ValueError("Complaint text is too short.")

	if not image_path.exists():
	raise FileNotFoundError(f"Image not found: {image_path}")

	model = self.load_model()
	image = self._load_image(image_path)

	text_embedding = model.encode(
	[complaint_text],
	convert_to_tensor=True,
	normalize_embeddings=True,
	)

	image_embedding = model.encode(
	[image],
	convert_to_tensor=True,
	normalize_embeddings=True,
	)

	score = float(util.cos_sim(text_embedding, image_embedding)[0][0])
	status, supports = self._decide_status(score)

	return VerificationResponse(
	complaint_text=complaint_text,
	image_match_score=round(score, 4),
	verification_status=status,
	image_supports_complaint=supports,
	strong_threshold=self.strong_threshold,
	partial_threshold=self.partial_threshold,
	method="qwen3_vl_embedding_image_text_similarity",
	model=self.model_name,
	)


	verifier = VisualEvidenceVerifier()


	# =========================
	# Routes
	# =========================

	@app.get("/", response_model=HealthResponse)
	def home():
	return HealthResponse(
	status="running",
	model_name=MODEL_NAME,
	model_loaded=verifier.model is not None,
	device=verifier.device,
	)


	@app.get("/health", response_model=HealthResponse)
	def health():
	return HealthResponse(
	status="ok",
	model_name=MODEL_NAME,
	model_loaded=verifier.model is not None,
	device=verifier.device,
	)


	@app.post("/load-model")
	def load_model():
	"""
	Optional endpoint to warm up the model before demo.
	First call may take time.
	"""
	verifier.load_model()

	return {
	"status": "loaded",
	"model": MODEL_NAME,
	"device": verifier.device,
	}


	@app.post("/verify-image-evidence", response_model=VerificationResponse)
	async def verify_image_evidence(
	complaint_text: str = Form(...),
	file: UploadFile = File(...),
	):
	allowed_extensions = {".jpg", ".jpeg", ".png", ".webp"}

	suffix = Path(file.filename).suffix.lower()

	if suffix not in allowed_extensions:
	raise HTTPException(
	status_code=400,
	detail=f"Unsupported image type '{suffix}'. Use jpg, jpeg, png, or webp.",
	)

	with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
	temp_path = Path(temp_file.name)
	temp_file.write(await file.read())

	try:
	return verifier.verify(
	complaint_text=complaint_text,
	image_path=temp_path,
	)

	except Exception as error:
	raise HTTPException(status_code=500, detail=str(error))

	finally:
	if temp_path.exists():
	temp_path.unlink()


	@app.post("/debug-compare-texts")
	def debug_compare_texts(
	text_a: str = Form(...),
	text_b: str = Form(...),
	):
	"""
	Debug endpoint to verify model embedding similarity for two texts.
	Useful before testing image upload.
	"""
	model = verifier.load_model()

	embeddings = model.encode(
	[text_a, text_b],
	convert_to_tensor=True,
	normalize_embeddings=True,
	)

	score = float(util.cos_sim(embeddings[0], embeddings[1]))

	return {
	"text_a": text_a,
	"text_b": text_b,
	"similarity_score": round(score, 4),
	"model": MODEL_NAME,
	}