Spaces:

hbatali2020
/

image-shield-api

Running

App Files Files Community

hbatali2020 commited on 8 days ago

Commit

e55e1b4

verified ·

1 Parent(s): 4762e5b

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -43

app.py CHANGED Viewed

@@ -1,15 +1,15 @@
 import sys
 import types
 import importlib.util
-# ─── Mock flash_attn (ضروري للتشغيل على CPU في Hugging Face) ───────
 flash_mock = types.ModuleType("flash_attn")
 flash_mock.__version__ = "2.0.0"
 flash_mock.__spec__ = importlib.util.spec_from_loader("flash_attn", loader=None)
 sys.modules["flash_attn"] = flash_mock
 sys.modules["flash_attn.flash_attn_interface"] = types.ModuleType("flash_attn.flash_attn_interface")
 sys.modules["flash_attn.bert_padding"] = types.ModuleType("flash_attn.bert_padding")
-# ───────────────────────────────────────────────────────────────────
 import io
 import time
@@ -17,48 +17,56 @@ import torch
 from PIL import Image
 from transformers import AutoProcessor, AutoModelForCausalLM
 from fastapi import FastAPI, HTTPException, UploadFile, File
-from fastapi.responses import JSONResponse
 from contextlib import asynccontextmanager
-# السؤال الموجه للنموذج (تم تحسينه ليكون أكثر دقة)
-VQA_QUESTION = "Is there a woman, or any female body part like hair or skin in this image? Answer yes or no."
 MODEL_DATA = {}
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     print("📥 Loading Florence-2-large...")
     MODEL_DATA["processor"] = AutoProcessor.from_pretrained(
-        "microsoft/Florence-2-large",
-        trust_remote_code=True
     )
     MODEL_DATA["model"] = AutoModelForCausalLM.from_pretrained(
-        "microsoft/Florence-2-large",
-        trust_remote_code=True,
         attn_implementation="eager"
     ).eval()
-    print("✅ Model is ready!")
     yield
     MODEL_DATA.clear()
-app = FastAPI(title="Privacy Guard VQA", lifespan=lifespan)
 @app.post("/analyze")
 async def analyze_image(file: UploadFile = File(...)):
     if not file.content_type.startswith("image/"):
-        raise HTTPException(status_code=400, detail="File is not an image")
     try:
-        # قراءة ومعالجة الصورة
-        image_bytes = await file.read()
-        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
         processor = MODEL_DATA["processor"]
-        model = MODEL_DATA["model"]
-        # تحضير المدخلات مع مهمة VQA
-        task = "<VQA>"
         prompt = f"{task}{VQA_QUESTION}"
         inputs = processor(text=prompt, images=image, return_tensors="pt")
         start_time = time.time()
@@ -66,41 +74,50 @@ async def analyze_image(file: UploadFile = File(...)):
             generated_ids = model.generate(
                 input_ids=inputs["input_ids"],
                 pixel_values=inputs["pixel_values"],
-                max_new_tokens=50,
-                num_beams=3 # رفع الـ beams يحسن جودة "التفكير" في السؤال
             )
-        # فك التشفير الخام
-        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
-        # المعالجة البعدية الذكية (هذه هي الخطوة التي كانت تنقصك)
-        parsed_answer = processor.post_process_generation(
-            generated_text,
-            task=task,
             image_size=(image.width, image.height)
         )
-        # استخراج الإجابة النظيفة (yes أو no)
-        clean_answer = parsed_answer[task].strip().lower()
-        # اتخاذ القرار
-        # نستخدم "yes" كشرط للحجب، وأي شيء آخر (أو إذا ارتبك النموذج) نعتبره "allow"
-        # أو يمكنك عكس المنطق ليكون أكثر تشدداً
-        decision = "block" if "yes" in clean_answer else "allow"
         return {
-            "decision": decision,
-            "vqa_answer": clean_answer,
-            "execution_time": round(time.time() - start_time, 2),
             "status": "success"
         }
     except Exception as e:
-        return JSONResponse(status_code=500, content={"error": str(e)})
-@app.get("/")
-def home():
-    return {"status": "running", "question_active": VQA_QUESTION}
 if __name__ == "__main__":
     import uvicorn

+# ─── flash_attn Mock ─────────────────────────────────────────────
 import sys
 import types
 import importlib.util
 flash_mock = types.ModuleType("flash_attn")
 flash_mock.__version__ = "2.0.0"
 flash_mock.__spec__ = importlib.util.spec_from_loader("flash_attn", loader=None)
 sys.modules["flash_attn"] = flash_mock
 sys.modules["flash_attn.flash_attn_interface"] = types.ModuleType("flash_attn.flash_attn_interface")
 sys.modules["flash_attn.bert_padding"] = types.ModuleType("flash_attn.bert_padding")
+# ─────────────────────────────────────────────────────────────────
 import io
 import time
 from PIL import Image
 from transformers import AutoProcessor, AutoModelForCausalLM
 from fastapi import FastAPI, HTTPException, UploadFile, File
 from contextlib import asynccontextmanager
+VQA_QUESTION = (
+    "Is there a woman or any part of a woman's body in this image? "
+    "Answer yes or no only."
+)
 MODEL_DATA = {}
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     print("📥 Loading Florence-2-large...")
+    start = time.time()
     MODEL_DATA["processor"] = AutoProcessor.from_pretrained(
+        "microsoft/Florence-2-large", trust_remote_code=True
     )
     MODEL_DATA["model"] = AutoModelForCausalLM.from_pretrained(
+        "microsoft/Florence-2-large",
+        torch_dtype=torch.float32,
+        trust_remote_code=True,
         attn_implementation="eager"
     ).eval()
+    print(f"✅ Model ready in {time.time()-start:.1f}s")
     yield
     MODEL_DATA.clear()
+app = FastAPI(lifespan=lifespan)
+@app.get("/health")
+def health():
+    return {"status": "ok", "model_loaded": "model" in MODEL_DATA}
 @app.post("/analyze")
 async def analyze_image(file: UploadFile = File(...)):
     if not file.content_type.startswith("image/"):
+        raise HTTPException(status_code=400, detail="ليس صورة")
+    try:
+        image = Image.open(io.BytesIO(await file.read())).convert("RGB")
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
     try:
         processor = MODEL_DATA["processor"]
+        model     = MODEL_DATA["model"]
+        task   = "<VQA>"
         prompt = f"{task}{VQA_QUESTION}"
         inputs = processor(text=prompt, images=image, return_tensors="pt")
         start_time = time.time()
             generated_ids = model.generate(
                 input_ids=inputs["input_ids"],
                 pixel_values=inputs["pixel_values"],
+                max_new_tokens=10,
+                num_beams=1,
+                do_sample=False
             )
+        elapsed = round(time.time() - start_time, 2)
+        # ─── DEBUG: نرى كل الصيغ الممكنة ─────────────────────────
+        raw_with_special    = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+        raw_without_special = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        parsed = processor.post_process_generation(
+            raw_with_special,
+            task=task,
             image_size=(image.width, image.height)
         )
+        # استخراج الإجابة من كل الطرق
+        answer_from_parsed   = parsed.get(task, "").strip().lower()
+        answer_from_raw      = raw_without_special.strip().lower()
+        # القرار بناءً على أفضل نتيجة
+        best_answer = answer_from_parsed if answer_from_parsed else answer_from_raw
+        decision    = "block" if "yes" in best_answer else "allow"
         return {
+            "decision":            decision,
+            "best_answer":         best_answer,
+            # DEBUG - لنرى ماذا يعيد النموذج فعلاً
+            "debug": {
+                "raw_with_special":    raw_with_special,
+                "raw_without_special": raw_without_special,
+                "parsed":              str(parsed),
+                "answer_from_parsed":  answer_from_parsed,
+                "answer_from_raw":     answer_from_raw,
+            },
+            "execution_time": elapsed,
             "status": "success"
         }
     except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
     import uvicorn