kanhacoderx commited on
Commit
eafadff
·
verified ·
1 Parent(s): e4c9f32

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +16 -0
  2. README.md +13 -10
  3. app.py +259 -0
  4. requirements.txt +10 -0
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
4
+
5
+ WORKDIR /app
6
+
7
+ COPY requirements.txt .
8
+
9
+ RUN pip install --no-cache-dir --upgrade pip
10
+ RUN pip install --no-cache-dir -r requirements.txt
11
+
12
+ COPY app.py .
13
+
14
+ EXPOSE 7860
15
+
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,13 @@
1
- ---
2
- title: Image Text Verify
3
- emoji: 🚀
4
- colorFrom: purple
5
- colorTo: gray
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
1
+ ---
2
+ title: Visual Evidence Verification API
3
+ emoji: 🖼️
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: false
9
+ ---
10
+
11
+ # Visual Evidence Verification API
12
+
13
+ FastAPI backend that verifies whether an uploaded image supports a multilingual citizen complaint
app.py ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from typing import Optional, List
3
+ import tempfile
4
+ import threading
5
+
6
+ from fastapi import FastAPI, UploadFile, File, Form, HTTPException
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from pydantic import BaseModel, Field
9
+ from PIL import Image
10
+
11
+ import torch
12
+ from sentence_transformers import SentenceTransformer, util
13
+
14
+
15
+ MODEL_NAME = "Qwen/Qwen3-VL-Embedding-2B"
16
+
17
+
18
+ app = FastAPI(
19
+ title="Visual Evidence Verification API",
20
+ description=(
21
+ "Verifies whether an uploaded image supports a multilingual citizen "
22
+ "complaint using Qwen3-VL multimodal embeddings."
23
+ ),
24
+ version="1.0.0",
25
+ )
26
+
27
+ app.add_middleware(
28
+ CORSMiddleware,
29
+ allow_origins=["*"], # later replace with your Vercel frontend URL
30
+ allow_credentials=True,
31
+ allow_methods=["*"],
32
+ allow_headers=["*"],
33
+ )
34
+
35
+
36
+ # =========================
37
+ # API Schemas
38
+ # =========================
39
+
40
+ class VerificationResponse(BaseModel):
41
+ complaint_text: str
42
+ image_match_score: float
43
+ verification_status: str
44
+ image_supports_complaint: bool
45
+ strong_threshold: float
46
+ partial_threshold: float
47
+ method: str
48
+ model: str
49
+
50
+
51
+ class HealthResponse(BaseModel):
52
+ status: str
53
+ model_name: str
54
+ model_loaded: bool
55
+ device: str
56
+
57
+
58
+ # =========================
59
+ # Service
60
+ # =========================
61
+
62
+ class VisualEvidenceVerifier:
63
+ """
64
+ Multilingual image-text verification using Qwen3-VL embeddings.
65
+
66
+ Logic:
67
+ - Encode complaint text
68
+ - Encode uploaded image
69
+ - Compare embeddings using cosine similarity
70
+ - Return match/partial/weak verification result
71
+ """
72
+
73
+ def __init__(
74
+ self,
75
+ model_name: str = MODEL_NAME,
76
+ strong_threshold: float = 0.55,
77
+ partial_threshold: float = 0.35,
78
+ ):
79
+ self.model_name = model_name
80
+ self.strong_threshold = strong_threshold
81
+ self.partial_threshold = partial_threshold
82
+
83
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
84
+
85
+ self.model: Optional[SentenceTransformer] = None
86
+ self._lock = threading.Lock()
87
+
88
+ def load_model(self):
89
+ """
90
+ Lazy model loading.
91
+ This prevents the Space from failing during startup if loading is slow.
92
+ First /verify request will load the model.
93
+ """
94
+ if self.model is None:
95
+ with self._lock:
96
+ if self.model is None:
97
+ self.model = SentenceTransformer(
98
+ self.model_name,
99
+ device=self.device,
100
+ )
101
+
102
+ return self.model
103
+
104
+ def _load_image(self, image_path: Path) -> Image.Image:
105
+ try:
106
+ return Image.open(image_path).convert("RGB")
107
+ except Exception as error:
108
+ raise ValueError(f"Invalid image file: {error}")
109
+
110
+ def _decide_status(self, score: float):
111
+ if score >= self.strong_threshold:
112
+ return "strong_match", True
113
+
114
+ if score >= self.partial_threshold:
115
+ return "partial_match", True
116
+
117
+ return "weak_match", False
118
+
119
+ def verify(
120
+ self,
121
+ complaint_text: str,
122
+ image_path: Path,
123
+ ) -> VerificationResponse:
124
+ if not complaint_text or len(complaint_text.strip()) < 3:
125
+ raise ValueError("Complaint text is too short.")
126
+
127
+ if not image_path.exists():
128
+ raise FileNotFoundError(f"Image not found: {image_path}")
129
+
130
+ model = self.load_model()
131
+ image = self._load_image(image_path)
132
+
133
+ text_embedding = model.encode(
134
+ [complaint_text],
135
+ convert_to_tensor=True,
136
+ normalize_embeddings=True,
137
+ )
138
+
139
+ image_embedding = model.encode(
140
+ [image],
141
+ convert_to_tensor=True,
142
+ normalize_embeddings=True,
143
+ )
144
+
145
+ score = float(util.cos_sim(text_embedding, image_embedding)[0][0])
146
+ status, supports = self._decide_status(score)
147
+
148
+ return VerificationResponse(
149
+ complaint_text=complaint_text,
150
+ image_match_score=round(score, 4),
151
+ verification_status=status,
152
+ image_supports_complaint=supports,
153
+ strong_threshold=self.strong_threshold,
154
+ partial_threshold=self.partial_threshold,
155
+ method="qwen3_vl_embedding_image_text_similarity",
156
+ model=self.model_name,
157
+ )
158
+
159
+
160
+ verifier = VisualEvidenceVerifier()
161
+
162
+
163
+ # =========================
164
+ # Routes
165
+ # =========================
166
+
167
+ @app.get("/", response_model=HealthResponse)
168
+ def home():
169
+ return HealthResponse(
170
+ status="running",
171
+ model_name=MODEL_NAME,
172
+ model_loaded=verifier.model is not None,
173
+ device=verifier.device,
174
+ )
175
+
176
+
177
+ @app.get("/health", response_model=HealthResponse)
178
+ def health():
179
+ return HealthResponse(
180
+ status="ok",
181
+ model_name=MODEL_NAME,
182
+ model_loaded=verifier.model is not None,
183
+ device=verifier.device,
184
+ )
185
+
186
+
187
+ @app.post("/load-model")
188
+ def load_model():
189
+ """
190
+ Optional endpoint to warm up the model before demo.
191
+ First call may take time.
192
+ """
193
+ verifier.load_model()
194
+
195
+ return {
196
+ "status": "loaded",
197
+ "model": MODEL_NAME,
198
+ "device": verifier.device,
199
+ }
200
+
201
+
202
+ @app.post("/verify-image-evidence", response_model=VerificationResponse)
203
+ async def verify_image_evidence(
204
+ complaint_text: str = Form(...),
205
+ file: UploadFile = File(...),
206
+ ):
207
+ allowed_extensions = {".jpg", ".jpeg", ".png", ".webp"}
208
+
209
+ suffix = Path(file.filename).suffix.lower()
210
+
211
+ if suffix not in allowed_extensions:
212
+ raise HTTPException(
213
+ status_code=400,
214
+ detail=f"Unsupported image type '{suffix}'. Use jpg, jpeg, png, or webp.",
215
+ )
216
+
217
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
218
+ temp_path = Path(temp_file.name)
219
+ temp_file.write(await file.read())
220
+
221
+ try:
222
+ return verifier.verify(
223
+ complaint_text=complaint_text,
224
+ image_path=temp_path,
225
+ )
226
+
227
+ except Exception as error:
228
+ raise HTTPException(status_code=500, detail=str(error))
229
+
230
+ finally:
231
+ if temp_path.exists():
232
+ temp_path.unlink()
233
+
234
+
235
+ @app.post("/debug-compare-texts")
236
+ def debug_compare_texts(
237
+ text_a: str = Form(...),
238
+ text_b: str = Form(...),
239
+ ):
240
+ """
241
+ Debug endpoint to verify model embedding similarity for two texts.
242
+ Useful before testing image upload.
243
+ """
244
+ model = verifier.load_model()
245
+
246
+ embeddings = model.encode(
247
+ [text_a, text_b],
248
+ convert_to_tensor=True,
249
+ normalize_embeddings=True,
250
+ )
251
+
252
+ score = float(util.cos_sim(embeddings[0], embeddings[1]))
253
+
254
+ return {
255
+ "text_a": text_a,
256
+ "text_b": text_b,
257
+ "similarity_score": round(score, 4),
258
+ "model": MODEL_NAME,
259
+ }
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.121.0
2
+ uvicorn[standard]==0.38.0
3
+ python-multipart==0.0.20
4
+ pillow==11.3.0
5
+ torch
6
+ sentence-transformers>=5.1.0
7
+ transformers>=4.57.0
8
+ accelerate>=1.10.0
9
+ einops
10
+ timm