Spaces:

webolavo
/

webai

Sleeping

App Files Files Community

webolavo commited on Mar 22

Commit

10e7f25

verified ·

1 Parent(s): 98c85f4

Update app.py

Browse files

Files changed (1) hide show

app.py +198 -70

app.py CHANGED Viewed

@@ -13,115 +13,243 @@ sys.modules["flash_attn.bert_padding"] = types.ModuleType("flash_attn.bert_paddi
 import io
 import time
 import torch
 from PIL import Image
-from transformers import AutoProcessor, AutoModelForCausalLM
 from fastapi import FastAPI, HTTPException, UploadFile, File
 from contextlib import asynccontextmanager
-MODEL_ID = "microsoft/Florence-2-large-ft"
-# ─── السؤال الأصلي + تأكيد على اليد ─────────────────────────────
-VQA_QUESTION = (
-    "Is there a woman or any part of a woman's body in this image? Answer yes or no only."
 )
 MODEL_DATA = {}
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    print(f"📥 Loading {MODEL_ID}...")
     start = time.time()
-    MODEL_DATA["processor"] = AutoProcessor.from_pretrained(
-        MODEL_ID, trust_remote_code=True
     )
-    MODEL_DATA["model"] = AutoModelForCausalLM.from_pretrained(
-        MODEL_ID,
         torch_dtype=torch.float32,
         trust_remote_code=True,
         attn_implementation="eager"
     ).eval()
-    print(f"✅ Model ready in {time.time()-start:.1f}s")
     yield
     MODEL_DATA.clear()
 app = FastAPI(
-    title="Female Detection API - VQA",
-    description="Florence-2-large-ft | VQA",
-    version="4.3.0",
     lifespan=lifespan
 )
-@app.get("/health")
-def health():
-    return {"status": "ok", "model_loaded": "model" in MODEL_DATA}
-def decide(answer: str) -> tuple[str, str]:
-    """
-    - "no"        → allow  ✅
-    - "yes"       → block  🔴
-    - أي شيء آخر → block  🔴 للأمان
-    """
-    a = answer.strip().lower()
-    if a == "no" or a.startswith("no"):
-        return "allow", "model_answered_no"
-    elif "yes" in a:
-        return "block", "model_answered_yes"
-    else:
-        return "block", "unexpected_answer_blocked_for_safety"
-@app.post("/analyze")
-async def analyze_image(file: UploadFile = File(...)):
-    if not file.content_type.startswith("image/"):
-        raise HTTPException(status_code=400, detail="الملف ليس صورة")
-    try:
-        image = Image.open(io.BytesIO(await file.read())).convert("RGB")
-    except Exception as e:
-        raise HTTPException(status_code=400, detail=f"خطأ في قراءة الصورة: {str(e)}")
-    try:
-        processor = MODEL_DATA["processor"]
-        model     = MODEL_DATA["model"]
-        task   = "<VQA>"
-        prompt = f"{task}{VQA_QUESTION}"
-        inputs = processor(text=prompt, images=image, return_tensors="pt")
-        start_time = time.time()
-        with torch.no_grad():
-            generated_ids = model.generate(
-                input_ids=inputs["input_ids"],
-                pixel_values=inputs["pixel_values"],
-                max_new_tokens=10,
-                num_beams=3,
-                do_sample=False
-            )
-        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
-        parsed  = processor.post_process_generation(
-            generated_text,
-            task=task,
-            image_size=(image.width, image.height)
         )
-        elapsed = round(time.time() - start_time, 2)
-        answer           = parsed.get(task, "").strip()
-        decision, reason = decide(answer)
         return {
-            "decision":       decision,
-            "reason":         reason,
-            "vqa_answer":     answer,
-            "question":       VQA_QUESTION,
-            "execution_time": elapsed,
-            "status":         "success"
         }
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":

 import io
 import time
+import httpx
 import torch
 from PIL import Image
+from transformers import (
+    BlipProcessor, BlipForQuestionAnswering,
+    AutoProcessor, AutoModelForCausalLM
+)
 from fastapi import FastAPI, HTTPException, UploadFile, File
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
 from contextlib import asynccontextmanager
+# ─── النماذج ─────────────────────────────────────────────────────
+BLIP_MODEL_ID     = "Salesforce/blip-vqa-base"
+FLORENCE_MODEL_ID = "microsoft/Florence-2-large-ft"
+# ─── أسئلة BLIP ───────────────────────────────────────────────────
+QUESTIONS = [
+    "is there a person in this image?",
+    "is there a woman in this image?",
+    "is there a human body part in this image?",
+    "is there a hand or arm visible?",
+    "is there a face visible?",
+    "is there a leg or foot visible?",
+    "is there a belly or stomach visible?",
+]
+# ─── سؤال Florence ────────────────────────────────────────────────
+FLORENCE_QUESTION = (
+    "Is there a woman or any part of a woman's body in this image? "
+    "Answer yes or no only."
 )
 MODEL_DATA = {}
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    # ── تحميل BLIP ────────────────────────────────────────────────
+    print(f"📥 Loading {BLIP_MODEL_ID}...")
     start = time.time()
+    MODEL_DATA["blip_processor"] = BlipProcessor.from_pretrained(BLIP_MODEL_ID)
+    MODEL_DATA["blip_model"]     = BlipForQuestionAnswering.from_pretrained(
+        BLIP_MODEL_ID, torch_dtype=torch.float32
+    ).eval()
+    print(f"✅ BLIP ready in {time.time()-start:.1f}s")
+    # ── تحميل Florence-2 ──────────────────────────────────────────
+    print(f"📥 Loading {FLORENCE_MODEL_ID}...")
+    start = time.time()
+    MODEL_DATA["florence_processor"] = AutoProcessor.from_pretrained(
+        FLORENCE_MODEL_ID, trust_remote_code=True
     )
+    MODEL_DATA["florence_model"] = AutoModelForCausalLM.from_pretrained(
+        FLORENCE_MODEL_ID,
         torch_dtype=torch.float32,
         trust_remote_code=True,
         attn_implementation="eager"
     ).eval()
+    print(f"✅ Florence-2 ready in {time.time()-start:.1f}s")
     yield
     MODEL_DATA.clear()
 app = FastAPI(
+    title="AI Shield - Dual Model Detection",
+    description="BLIP + Florence-2-large-ft | Compatible with AI Shield Chrome Extension",
+    version="6.0.0",
     lifespan=lifespan
 )
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# ─── Schema ───────────────────────────────────────────────────────
+class ImageUrlRequest(BaseModel):
+    image_url: str
+# ─── دالة BLIP ─────────────────────────��──────────────────────────
+def run_blip(image: Image.Image) -> dict:
+    processor   = MODEL_DATA["blip_processor"]
+    model       = MODEL_DATA["blip_model"]
+    yes_answers = {}
+    no_answers  = {}
+    for question in QUESTIONS:
+        inputs = processor(image, question, return_tensors="pt")
+        with torch.no_grad():
+            out = model.generate(**inputs, max_new_tokens=5)
+        answer = processor.decode(out[0], skip_special_tokens=True).strip().lower()
+        if answer == "yes" or answer.startswith("yes"):
+            yes_answers[question] = answer
+        else:
+            no_answers[question] = answer
+    return {"yes": yes_answers, "no": no_answers}
+# ─── دالة Florence-2 ──────────────────────────────────────────────
+def run_florence(image: Image.Image) -> dict:
+    processor = MODEL_DATA["florence_processor"]
+    model     = MODEL_DATA["florence_model"]
+    task   = "<VQA>"
+    prompt = f"{task}{FLORENCE_QUESTION}"
+    inputs = processor(text=prompt, images=image, return_tensors="pt")
+    start = time.time()
+    with torch.no_grad():
+        generated_ids = model.generate(
+            input_ids=inputs["input_ids"],
+            pixel_values=inputs["pixel_values"],
+            max_new_tokens=10,
+            do_sample=False
         )
+    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+    parsed = processor.post_process_generation(
+        generated_text, task=task,
+        image_size=(image.width, image.height)
+    )
+    answer  = parsed.get(task, "").strip().lower()
+    elapsed = round(time.time() - start, 2)
+    if answer == "no" or answer.startswith("no"):
+        return {"decision": "ALLOW", "answer": answer, "elapsed": elapsed}
+    else:
+        return {"decision": "BLOCK", "answer": answer, "elapsed": elapsed}
+# ─── منطق القرار الرئيسي ─────────────────────────────────────────
+def process_image(image: Image.Image) -> dict:
+    total_start = time.time()
+    # ══ المرحلة 1: BLIP ══════════════════════════════════════════
+    blip_start  = time.time()
+    blip_result = run_blip(image)
+    blip_elapsed = round(time.time() - blip_start, 2)
+    yes_q = blip_result["yes"]
+    no_q  = blip_result["no"]
+    # ─── الحالة 1: BLIP اكتشف امرأة مباشرة → BLOCK فوراً ─────────
+    WOMAN_QUESTIONS = [
+        "is there a woman in this image?",
+    ]
+    woman_detected = any(q in yes_q for q in WOMAN_QUESTIONS)
+    if woman_detected:
+        return {
+            "decision":      "BLOCK",
+            "reason":        "blip_detected_woman_directly",
+            "stage":         "blip_only",
+            "blip_yes":      yes_q,
+            "blip_no":       no_q,
+            "blip_time":     blip_elapsed,
+            "florence_used": False,
+            "total_time":    round(time.time() - total_start, 2),
+            "status":        "success"
+        }
+    # ─── الحالة 2: BLIP لم يكتشف أي إنسان → ALLOW فوراً ──────────
+    if not yes_q:
         return {
+            "decision":      "ALLOW",
+            "reason":        "blip_no_human_detected",
+            "stage":         "blip_only",
+            "blip_yes":      yes_q,
+            "blip_no":       no_q,
+            "blip_time":     blip_elapsed,
+            "florence_used": False,
+            "total_time":    round(time.time() - total_start, 2),
+            "status":        "success"
         }
+    # ─── الحالة 3: BLIP اكتشف إنسان لكن ليس امرأة → Florence ─────
+    florence_result = run_florence(image)
+    final_decision = florence_result["decision"]
+    reason = "florence_confirmed_woman" if final_decision == "BLOCK" \
+             else "florence_confirmed_no_woman"
+    return {
+        "decision":        final_decision,
+        "reason":          reason,
+        "stage":           "blip_then_florence",
+        "blip_yes":        yes_q,
+        "blip_no":         no_q,
+        "blip_time":       blip_elapsed,
+        "florence_answer": florence_result["answer"],
+        "florence_time":   florence_result["elapsed"],
+        "florence_used":   True,
+        "total_time":      round(time.time() - total_start, 2),
+        "status":          "success"
+    }
+# ─── Health ───────────────────────────────────────────────────────
+@app.get("/health")
+def health():
+    return {
+        "status":          "ok",
+        "blip_loaded":     "blip_model" in MODEL_DATA,
+        "florence_loaded": "florence_model" in MODEL_DATA
+    }
+# ─── Endpoint 1: من إضافة Chrome ─────────────────────────────────
+@app.post("/analyze")
+async def analyze_from_url(request: ImageUrlRequest):
+    try:
+        async with httpx.AsyncClient(timeout=30) as client:
+            response = await client.get(request.image_url)
+            response.raise_for_status()
+            image_bytes = response.content
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=f"فشل تحميل الصورة: {str(e)}")
+    try:
+        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
     except Exception as e:
+        raise HTTPException(status_code=400, detail=f"خطأ في قراءة الصورة: {str(e)}")
+    return process_image(image)
+# ─── Endpoint 2: اختبار يدوي ─────────────────────────────────────
+@app.post("/analyze-file")
+async def analyze_from_file(file: UploadFile = File(...)):
+    if not file.content_type.startswith("image/"):
+        raise HTTPException(status_code=400, detail="الملف ليس صورة")
+    try:
+        image = Image.open(io.BytesIO(await file.read())).convert("RGB")
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=f"خطأ في قراءة الصورة: {str(e)}")
+    return process_image(image)
 if __name__ == "__main__":