webolavo commited on
Commit
10e7f25
ยท
verified ยท
1 Parent(s): 98c85f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +198 -70
app.py CHANGED
@@ -13,115 +13,243 @@ sys.modules["flash_attn.bert_padding"] = types.ModuleType("flash_attn.bert_paddi
13
 
14
  import io
15
  import time
 
16
  import torch
17
  from PIL import Image
18
- from transformers import AutoProcessor, AutoModelForCausalLM
 
 
 
19
  from fastapi import FastAPI, HTTPException, UploadFile, File
 
 
20
  from contextlib import asynccontextmanager
21
 
22
- MODEL_ID = "microsoft/Florence-2-large-ft"
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- # โ”€โ”€โ”€ ุงู„ุณุคุงู„ ุงู„ุฃุตู„ูŠ + ุชุฃูƒูŠุฏ ุนู„ู‰ ุงู„ูŠุฏ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
25
- VQA_QUESTION = (
26
- "Is there a woman or any part of a woman's body in this image? Answer yes or no only."
 
27
  )
28
 
29
  MODEL_DATA = {}
30
 
31
  @asynccontextmanager
32
  async def lifespan(app: FastAPI):
33
- print(f"๐Ÿ“ฅ Loading {MODEL_ID}...")
 
34
  start = time.time()
35
- MODEL_DATA["processor"] = AutoProcessor.from_pretrained(
36
- MODEL_ID, trust_remote_code=True
 
 
 
 
 
 
 
 
 
37
  )
38
- MODEL_DATA["model"] = AutoModelForCausalLM.from_pretrained(
39
- MODEL_ID,
40
  torch_dtype=torch.float32,
41
  trust_remote_code=True,
42
  attn_implementation="eager"
43
  ).eval()
44
- print(f"โœ… Model ready in {time.time()-start:.1f}s")
 
45
  yield
46
  MODEL_DATA.clear()
47
 
48
  app = FastAPI(
49
- title="Female Detection API - VQA",
50
- description="Florence-2-large-ft | VQA",
51
- version="4.3.0",
52
  lifespan=lifespan
53
  )
54
 
55
- @app.get("/health")
56
- def health():
57
- return {"status": "ok", "model_loaded": "model" in MODEL_DATA}
58
-
59
- def decide(answer: str) -> tuple[str, str]:
60
- """
61
- - "no" โ†’ allow โœ…
62
- - "yes" โ†’ block ๐Ÿ”ด
63
- - ุฃูŠ ุดูŠุก ุขุฎุฑ โ†’ block ๐Ÿ”ด ู„ู„ุฃู…ุงู†
64
- """
65
- a = answer.strip().lower()
66
- if a == "no" or a.startswith("no"):
67
- return "allow", "model_answered_no"
68
- elif "yes" in a:
69
- return "block", "model_answered_yes"
70
- else:
71
- return "block", "unexpected_answer_blocked_for_safety"
72
 
73
- @app.post("/analyze")
74
- async def analyze_image(file: UploadFile = File(...)):
 
75
 
76
- if not file.content_type.startswith("image/"):
77
- raise HTTPException(status_code=400, detail="ุงู„ู…ู„ู ู„ูŠุณ ุตูˆุฑุฉ")
 
 
 
 
78
 
79
- try:
80
- image = Image.open(io.BytesIO(await file.read())).convert("RGB")
81
- except Exception as e:
82
- raise HTTPException(status_code=400, detail=f"ุฎุทุฃ ููŠ ู‚ุฑุงุกุฉ ุงู„ุตูˆุฑุฉ: {str(e)}")
 
 
 
 
 
83
 
84
- try:
85
- processor = MODEL_DATA["processor"]
86
- model = MODEL_DATA["model"]
87
 
88
- task = "<VQA>"
89
- prompt = f"{task}{VQA_QUESTION}"
 
 
90
 
91
- inputs = processor(text=prompt, images=image, return_tensors="pt")
 
 
92
 
93
- start_time = time.time()
94
- with torch.no_grad():
95
- generated_ids = model.generate(
96
- input_ids=inputs["input_ids"],
97
- pixel_values=inputs["pixel_values"],
98
- max_new_tokens=10,
99
- num_beams=3,
100
- do_sample=False
101
- )
102
-
103
- generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
104
- parsed = processor.post_process_generation(
105
- generated_text,
106
- task=task,
107
- image_size=(image.width, image.height)
108
  )
109
- elapsed = round(time.time() - start_time, 2)
110
 
111
- answer = parsed.get(task, "").strip()
112
- decision, reason = decide(answer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  return {
115
- "decision": decision,
116
- "reason": reason,
117
- "vqa_answer": answer,
118
- "question": VQA_QUESTION,
119
- "execution_time": elapsed,
120
- "status": "success"
 
 
 
121
  }
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  except Exception as e:
124
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
 
127
  if __name__ == "__main__":
 
13
 
14
  import io
15
  import time
16
+ import httpx
17
  import torch
18
  from PIL import Image
19
+ from transformers import (
20
+ BlipProcessor, BlipForQuestionAnswering,
21
+ AutoProcessor, AutoModelForCausalLM
22
+ )
23
  from fastapi import FastAPI, HTTPException, UploadFile, File
24
+ from fastapi.middleware.cors import CORSMiddleware
25
+ from pydantic import BaseModel
26
  from contextlib import asynccontextmanager
27
 
28
+ # โ”€โ”€โ”€ ุงู„ู†ู…ุงุฐุฌ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
29
+ BLIP_MODEL_ID = "Salesforce/blip-vqa-base"
30
+ FLORENCE_MODEL_ID = "microsoft/Florence-2-large-ft"
31
+
32
+ # โ”€โ”€โ”€ ุฃุณุฆู„ุฉ BLIP โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
33
+ QUESTIONS = [
34
+ "is there a person in this image?",
35
+ "is there a woman in this image?",
36
+ "is there a human body part in this image?",
37
+ "is there a hand or arm visible?",
38
+ "is there a face visible?",
39
+ "is there a leg or foot visible?",
40
+ "is there a belly or stomach visible?",
41
+ ]
42
 
43
+ # โ”€โ”€โ”€ ุณุคุงู„ Florence โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
44
+ FLORENCE_QUESTION = (
45
+ "Is there a woman or any part of a woman's body in this image? "
46
+ "Answer yes or no only."
47
  )
48
 
49
  MODEL_DATA = {}
50
 
51
  @asynccontextmanager
52
  async def lifespan(app: FastAPI):
53
+ # โ”€โ”€ ุชุญู…ูŠู„ BLIP โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
54
+ print(f"๐Ÿ“ฅ Loading {BLIP_MODEL_ID}...")
55
  start = time.time()
56
+ MODEL_DATA["blip_processor"] = BlipProcessor.from_pretrained(BLIP_MODEL_ID)
57
+ MODEL_DATA["blip_model"] = BlipForQuestionAnswering.from_pretrained(
58
+ BLIP_MODEL_ID, torch_dtype=torch.float32
59
+ ).eval()
60
+ print(f"โœ… BLIP ready in {time.time()-start:.1f}s")
61
+
62
+ # โ”€โ”€ ุชุญู…ูŠู„ Florence-2 โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
63
+ print(f"๐Ÿ“ฅ Loading {FLORENCE_MODEL_ID}...")
64
+ start = time.time()
65
+ MODEL_DATA["florence_processor"] = AutoProcessor.from_pretrained(
66
+ FLORENCE_MODEL_ID, trust_remote_code=True
67
  )
68
+ MODEL_DATA["florence_model"] = AutoModelForCausalLM.from_pretrained(
69
+ FLORENCE_MODEL_ID,
70
  torch_dtype=torch.float32,
71
  trust_remote_code=True,
72
  attn_implementation="eager"
73
  ).eval()
74
+ print(f"โœ… Florence-2 ready in {time.time()-start:.1f}s")
75
+
76
  yield
77
  MODEL_DATA.clear()
78
 
79
  app = FastAPI(
80
+ title="AI Shield - Dual Model Detection",
81
+ description="BLIP + Florence-2-large-ft | Compatible with AI Shield Chrome Extension",
82
+ version="6.0.0",
83
  lifespan=lifespan
84
  )
85
 
86
+ app.add_middleware(
87
+ CORSMiddleware,
88
+ allow_origins=["*"],
89
+ allow_credentials=True,
90
+ allow_methods=["*"],
91
+ allow_headers=["*"],
92
+ )
 
 
 
 
 
 
 
 
 
 
93
 
94
+ # โ”€โ”€โ”€ Schema โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
95
+ class ImageUrlRequest(BaseModel):
96
+ image_url: str
97
 
98
+ # โ”€โ”€โ”€ ุฏุงู„ุฉ BLIP โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€๏ฟฝ๏ฟฝโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
99
+ def run_blip(image: Image.Image) -> dict:
100
+ processor = MODEL_DATA["blip_processor"]
101
+ model = MODEL_DATA["blip_model"]
102
+ yes_answers = {}
103
+ no_answers = {}
104
 
105
+ for question in QUESTIONS:
106
+ inputs = processor(image, question, return_tensors="pt")
107
+ with torch.no_grad():
108
+ out = model.generate(**inputs, max_new_tokens=5)
109
+ answer = processor.decode(out[0], skip_special_tokens=True).strip().lower()
110
+ if answer == "yes" or answer.startswith("yes"):
111
+ yes_answers[question] = answer
112
+ else:
113
+ no_answers[question] = answer
114
 
115
+ return {"yes": yes_answers, "no": no_answers}
 
 
116
 
117
+ # โ”€โ”€โ”€ ุฏุงู„ุฉ Florence-2 โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
118
+ def run_florence(image: Image.Image) -> dict:
119
+ processor = MODEL_DATA["florence_processor"]
120
+ model = MODEL_DATA["florence_model"]
121
 
122
+ task = "<VQA>"
123
+ prompt = f"{task}{FLORENCE_QUESTION}"
124
+ inputs = processor(text=prompt, images=image, return_tensors="pt")
125
 
126
+ start = time.time()
127
+ with torch.no_grad():
128
+ generated_ids = model.generate(
129
+ input_ids=inputs["input_ids"],
130
+ pixel_values=inputs["pixel_values"],
131
+ max_new_tokens=10,
132
+ do_sample=False
 
 
 
 
 
 
 
 
133
  )
 
134
 
135
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
136
+ parsed = processor.post_process_generation(
137
+ generated_text, task=task,
138
+ image_size=(image.width, image.height)
139
+ )
140
+ answer = parsed.get(task, "").strip().lower()
141
+ elapsed = round(time.time() - start, 2)
142
+
143
+ if answer == "no" or answer.startswith("no"):
144
+ return {"decision": "ALLOW", "answer": answer, "elapsed": elapsed}
145
+ else:
146
+ return {"decision": "BLOCK", "answer": answer, "elapsed": elapsed}
147
+
148
+ # โ”€โ”€โ”€ ู…ู†ุทู‚ ุงู„ู‚ุฑุงุฑ ุงู„ุฑุฆูŠุณูŠ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
149
+ def process_image(image: Image.Image) -> dict:
150
+ total_start = time.time()
151
+
152
+ # โ•โ• ุงู„ู…ุฑุญู„ุฉ 1: BLIP โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
153
+ blip_start = time.time()
154
+ blip_result = run_blip(image)
155
+ blip_elapsed = round(time.time() - blip_start, 2)
156
+
157
+ yes_q = blip_result["yes"]
158
+ no_q = blip_result["no"]
159
 
160
+ # โ”€โ”€โ”€ ุงู„ุญุงู„ุฉ 1: BLIP ุงูƒุชุดู ุงู…ุฑุฃุฉ ู…ุจุงุดุฑุฉ โ†’ BLOCK ููˆุฑุงู‹ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
161
+ WOMAN_QUESTIONS = [
162
+ "is there a woman in this image?",
163
+ ]
164
+ woman_detected = any(q in yes_q for q in WOMAN_QUESTIONS)
165
+
166
+ if woman_detected:
167
+ return {
168
+ "decision": "BLOCK",
169
+ "reason": "blip_detected_woman_directly",
170
+ "stage": "blip_only",
171
+ "blip_yes": yes_q,
172
+ "blip_no": no_q,
173
+ "blip_time": blip_elapsed,
174
+ "florence_used": False,
175
+ "total_time": round(time.time() - total_start, 2),
176
+ "status": "success"
177
+ }
178
+
179
+ # โ”€โ”€โ”€ ุงู„ุญุงู„ุฉ 2: BLIP ู„ู… ูŠูƒุชุดู ุฃูŠ ุฅู†ุณุงู† โ†’ ALLOW ููˆุฑุงู‹ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
180
+ if not yes_q:
181
  return {
182
+ "decision": "ALLOW",
183
+ "reason": "blip_no_human_detected",
184
+ "stage": "blip_only",
185
+ "blip_yes": yes_q,
186
+ "blip_no": no_q,
187
+ "blip_time": blip_elapsed,
188
+ "florence_used": False,
189
+ "total_time": round(time.time() - total_start, 2),
190
+ "status": "success"
191
  }
192
 
193
+ # โ”€โ”€โ”€ ุงู„ุญุงู„ุฉ 3: BLIP ุงูƒุชุดู ุฅู†ุณุงู† ู„ูƒู† ู„ูŠุณ ุงู…ุฑุฃุฉ โ†’ Florence โ”€โ”€โ”€โ”€โ”€
194
+ florence_result = run_florence(image)
195
+
196
+ final_decision = florence_result["decision"]
197
+ reason = "florence_confirmed_woman" if final_decision == "BLOCK" \
198
+ else "florence_confirmed_no_woman"
199
+
200
+ return {
201
+ "decision": final_decision,
202
+ "reason": reason,
203
+ "stage": "blip_then_florence",
204
+ "blip_yes": yes_q,
205
+ "blip_no": no_q,
206
+ "blip_time": blip_elapsed,
207
+ "florence_answer": florence_result["answer"],
208
+ "florence_time": florence_result["elapsed"],
209
+ "florence_used": True,
210
+ "total_time": round(time.time() - total_start, 2),
211
+ "status": "success"
212
+ }
213
+
214
+ # โ”€โ”€โ”€ Health โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
215
+ @app.get("/health")
216
+ def health():
217
+ return {
218
+ "status": "ok",
219
+ "blip_loaded": "blip_model" in MODEL_DATA,
220
+ "florence_loaded": "florence_model" in MODEL_DATA
221
+ }
222
+
223
+ # โ”€โ”€โ”€ Endpoint 1: ู…ู† ุฅุถุงูุฉ Chrome โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
224
+ @app.post("/analyze")
225
+ async def analyze_from_url(request: ImageUrlRequest):
226
+ try:
227
+ async with httpx.AsyncClient(timeout=30) as client:
228
+ response = await client.get(request.image_url)
229
+ response.raise_for_status()
230
+ image_bytes = response.content
231
+ except Exception as e:
232
+ raise HTTPException(status_code=400, detail=f"ูุดู„ ุชุญู…ูŠู„ ุงู„ุตูˆุฑุฉ: {str(e)}")
233
+
234
+ try:
235
+ image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
236
  except Exception as e:
237
+ raise HTTPException(status_code=400, detail=f"ุฎุทุฃ ููŠ ู‚ุฑุงุกุฉ ุงู„ุตูˆุฑุฉ: {str(e)}")
238
+
239
+ return process_image(image)
240
+
241
+ # โ”€โ”€โ”€ Endpoint 2: ุงุฎุชุจุงุฑ ูŠุฏูˆูŠ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
242
+ @app.post("/analyze-file")
243
+ async def analyze_from_file(file: UploadFile = File(...)):
244
+ if not file.content_type.startswith("image/"):
245
+ raise HTTPException(status_code=400, detail="ุงู„ู…ู„ู ู„ูŠุณ ุตูˆุฑุฉ")
246
+
247
+ try:
248
+ image = Image.open(io.BytesIO(await file.read())).convert("RGB")
249
+ except Exception as e:
250
+ raise HTTPException(status_code=400, detail=f"ุฎุทุฃ ููŠ ู‚ุฑุงุกุฉ ุงู„ุตูˆุฑุฉ: {str(e)}")
251
+
252
+ return process_image(image)
253
 
254
 
255
  if __name__ == "__main__":