Spaces:

hbatali2020
/

image-shield-api

Running

App Files Files Community

hbatali2020 commited on 3 days ago

Commit

f968395

verified ·

1 Parent(s): 5326748

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -85

app.py CHANGED Viewed

@@ -2,16 +2,19 @@ import io
 import time
 import torch
 from PIL import Image
-from transformers import AutoProcessor, AutoModelForCausalLM
 from fastapi import FastAPI, HTTPException, UploadFile, File
 from fastapi.middleware.cors import CORSMiddleware
 from contextlib import asynccontextmanager
-MODEL_ID = "Qwen/Qwen3.5-0.8B"
-VQA_QUESTION = (
-    "Is there a woman or any part of a woman's body in this image? Answer yes or no."
-)
 MODEL_DATA = {}
@@ -20,28 +23,19 @@ async def lifespan(app: FastAPI):
     print(f"📥 Loading {MODEL_ID}...")
     start = time.time()
-    MODEL_DATA["processor"] = AutoProcessor.from_pretrained(
-        MODEL_ID,
-        trust_remote_code=True
-    )
-    MODEL_DATA["model"] = AutoModelForCausalLM.from_pretrained(
-        MODEL_ID,
-        torch_dtype=torch.float32,
-        trust_remote_code=True,
-        attn_implementation="eager",
-        device_map="cpu"
     ).eval()
-    # ─── DEBUG: نرى ما يقبله النموذج ─────────────────────────────
-    sig = MODEL_DATA["model"].forward.__code__.co_varnames
     print(f"✅ Model ready in {time.time()-start:.1f}s")
-    print(f"📋 Model forward args: {list(sig)[:20]}")
     yield
     MODEL_DATA.clear()
 app = FastAPI(
-    title="Female Detection API - Qwen3.5-0.8B",
-    version="1.3.0",
     lifespan=lifespan
 )
@@ -57,15 +51,6 @@ app.add_middleware(
 def health():
     return {"status": "ok", "model_loaded": "model" in MODEL_DATA}
-def decide(answer: str) -> tuple[str, str]:
-    a = answer.strip().lower()
-    if a == "no" or a.startswith("no"):
-        return "allow", "model_answered_no"
-    elif "yes" in a:
-        return "block", "model_answered_yes"
-    else:
-        return "block", "unexpected_answer_blocked_for_safety"
 @app.post("/analyze")
 async def analyze_image(file: UploadFile = File(...)):
@@ -73,8 +58,7 @@ async def analyze_image(file: UploadFile = File(...)):
         raise HTTPException(status_code=400, detail="الملف ليس صورة")
     try:
-        image_bytes = await file.read()
-        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
     except Exception as e:
         raise HTTPException(status_code=400, detail=f"خطأ في قراءة الصورة: {str(e)}")
@@ -82,70 +66,52 @@ async def analyze_image(file: UploadFile = File(...)):
         processor = MODEL_DATA["processor"]
         model     = MODEL_DATA["model"]
-        messages = [
-            {
-                "role": "user",
-                "content": [
-                    {"type": "image", "image": image},
-                    {"type": "text",  "text": VQA_QUESTION}
-                ]
-            }
-        ]
-        inputs = processor.apply_chat_template(
-            messages,
-            tokenize=True,
-            add_generation_prompt=True,
-            return_dict=True,
             return_tensors="pt"
         )
-        # ─── الحل: نحذف الـ keys التي لا يقبلها النموذج ──────────
-        # Qwen3.5 يستخدم Early Fusion → الصورة مدمجة في input_ids
-        KEYS_TO_REMOVE = [
-            "mm_token_type_ids",
-            "pixel_values",
-            "image_grid_thw",
-            "pixel_values_videos",
-            "video_grid_thw",
-            "second_per_grid_ts",
-        ]
-        clean_inputs = {
-            k: v for k, v in inputs.items()
-            if k not in KEYS_TO_REMOVE
-        }
-        # ─── DEBUG: نرى ما تبقى ───────────────────────────────────
-        print(f"🔑 Keys sent to generate: {list(clean_inputs.keys())}")
         start_time = time.time()
         with torch.no_grad():
-            generated_ids = model.generate(
-                **clean_inputs,
-                max_new_tokens=20,
-                do_sample=False,
-                temperature=None,
-                top_p=None,
-            )
-        generated_ids_trimmed = [
-            out_ids[len(in_ids):]
-            for in_ids, out_ids in zip(inputs["input_ids"], generated_ids)
         ]
-        answer = processor.batch_decode(
-            generated_ids_trimmed,
-            skip_special_tokens=True,
-            clean_up_tokenization_spaces=False
-        )[0].strip()
-        elapsed          = round(time.time() - start_time, 2)
-        decision, reason = decide(answer)
         return {
             "decision":       decision,
-            "reason":         reason,
-            "vqa_answer":     answer,
-            "question":       VQA_QUESTION,
             "execution_time": elapsed,
             "status":         "success"
         }

 import time
 import torch
 from PIL import Image
+from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
 from fastapi import FastAPI, HTTPException, UploadFile, File
 from fastapi.middleware.cors import CORSMiddleware
 from contextlib import asynccontextmanager
+MODEL_ID = "IDEA-Research/grounding-dino-base"
+# ─── نص البحث: يجب أن يكون lowercase وينتهي بنقطة ───────────────
+# قاعدة مهمة في Grounding DINO!
+DETECTION_TEXT = "woman . girl . female . person . human . hand . arm . face . leg . finger ."
+# ─── عتبة الثقة ──────────────────────────────────────────────────
+THRESHOLD = 0.3
 MODEL_DATA = {}
     print(f"📥 Loading {MODEL_ID}...")
     start = time.time()
+    MODEL_DATA["processor"] = AutoProcessor.from_pretrained(MODEL_ID)
+    MODEL_DATA["model"] = AutoModelForZeroShotObjectDetection.from_pretrained(
+        MODEL_ID
     ).eval()
     print(f"✅ Model ready in {time.time()-start:.1f}s")
     yield
     MODEL_DATA.clear()
 app = FastAPI(
+    title="Female Detection API - Grounding DINO Base",
+    description="IDEA-Research/grounding-dino-base | Zero-Shot Object Detection",
+    version="1.0.0",
     lifespan=lifespan
 )
 def health():
     return {"status": "ok", "model_loaded": "model" in MODEL_DATA}
 @app.post("/analyze")
 async def analyze_image(file: UploadFile = File(...)):
         raise HTTPException(status_code=400, detail="الملف ليس صورة")
     try:
+        image = Image.open(io.BytesIO(await file.read())).convert("RGB")
     except Exception as e:
         raise HTTPException(status_code=400, detail=f"خطأ في قراءة الصورة: {str(e)}")
         processor = MODEL_DATA["processor"]
         model     = MODEL_DATA["model"]
+        inputs = processor(
+            images=image,
+            text=DETECTION_TEXT,
             return_tensors="pt"
         )
         start_time = time.time()
         with torch.no_grad():
+            outputs = model(**inputs)
+        # ─── post process ─────────────────────────────────────────
+        results = processor.post_process_grounded_object_detection(
+            outputs,
+            inputs.input_ids,
+            threshold=THRESHOLD,
+            text_threshold=THRESHOLD,
+            target_sizes=[image.size[::-1]]  # (height, width)
+        )[0]
+        elapsed = round(time.time() - start_time, 2)
+        boxes  = results["boxes"].tolist()
+        scores = results["scores"].tolist()
+        labels = results["labels"]
+        # ─── تجميع النتائج ────────────────────────────────────────
+        detections = [
+            {
+                "label":      label,
+                "confidence": round(score, 3),
+                "bbox":       [round(x, 1) for x in box]
+            }
+            for label, score, box in zip(labels, scores, boxes)
+            if score >= THRESHOLD
         ]
+        # ─── القرار ───────────────────────────────────────────────
+        decision = "block" if len(detections) > 0 else "allow"
+        summary  = f"yes detected: {', '.join(set(d['label'] for d in detections))}" \
+                   if detections else "no detected human body"
         return {
             "decision":       decision,
+            "summary":        summary,
+            "detected_count": len(detections),
+            "detections":     detections,
             "execution_time": elapsed,
             "status":         "success"
         }