# ─── flash_attn Mock ───────────────────────────────────────────── import sys import types import importlib.util flash_mock = types.ModuleType("flash_attn") flash_mock.__version__ = "2.0.0" flash_mock.__spec__ = importlib.util.spec_from_loader("flash_attn", loader=None) sys.modules["flash_attn"] = flash_mock sys.modules["flash_attn.flash_attn_interface"] = types.ModuleType("flash_attn.flash_attn_interface") sys.modules["flash_attn.bert_padding"] = types.ModuleType("flash_attn.bert_padding") # ───────────────────────────────────────────────────────────────── import io import time import httpx import torch from PIL import Image from transformers import ( BlipProcessor, BlipForQuestionAnswering, AutoProcessor, AutoModelForCausalLM ) from fastapi import FastAPI, HTTPException, UploadFile, File from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from contextlib import asynccontextmanager # ─── النماذج ───────────────────────────────────────────────────── BLIP_MODEL_ID = "Salesforce/blip-vqa-base" FLORENCE_MODEL_ID = "microsoft/Florence-2-large-ft" # ─── أسئلة BLIP ─────────────────────────────────────────────────── QUESTIONS = [ "is there a person in this image?", "is there a woman in this image?", "is there a human body part in this image?", "is there a hand or arm visible?", "is there a face visible?", "is there a leg or foot visible?", "is there a belly or stomach visible?", ] # ─── سؤال Florence ──────────────────────────────────────────────── FLORENCE_QUESTION = ( "Is there a woman or any part of a woman's body in this image? " "Answer yes or no only." ) MODEL_DATA = {} @asynccontextmanager async def lifespan(app: FastAPI): # ── تحميل BLIP ──────────────────────────────────────────────── print(f"📥 Loading {BLIP_MODEL_ID}...") start = time.time() MODEL_DATA["blip_processor"] = BlipProcessor.from_pretrained(BLIP_MODEL_ID) MODEL_DATA["blip_model"] = BlipForQuestionAnswering.from_pretrained( BLIP_MODEL_ID, torch_dtype=torch.float32 ).eval() print(f"✅ BLIP ready in {time.time()-start:.1f}s") # ── تحميل Florence-2 ────────────────────────────────────────── print(f"📥 Loading {FLORENCE_MODEL_ID}...") start = time.time() MODEL_DATA["florence_processor"] = AutoProcessor.from_pretrained( FLORENCE_MODEL_ID, trust_remote_code=True ) MODEL_DATA["florence_model"] = AutoModelForCausalLM.from_pretrained( FLORENCE_MODEL_ID, torch_dtype=torch.float32, trust_remote_code=True, attn_implementation="eager" ).eval() print(f"✅ Florence-2 ready in {time.time()-start:.1f}s") yield MODEL_DATA.clear() app = FastAPI( title="AI Shield - Dual Model Detection", description="BLIP + Florence-2-large-ft | Compatible with AI Shield Chrome Extension", version="6.0.0", lifespan=lifespan ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # ─── Schema ─────────────────────────────────────────────────────── class ImageUrlRequest(BaseModel): image_url: str # ─── دالة BLIP ──────────────────────────────────────────────────── def run_blip(image: Image.Image) -> dict: processor = MODEL_DATA["blip_processor"] model = MODEL_DATA["blip_model"] yes_answers = {} no_answers = {} for question in QUESTIONS: inputs = processor(image, question, return_tensors="pt") with torch.no_grad(): out = model.generate(**inputs, max_new_tokens=5) answer = processor.decode(out[0], skip_special_tokens=True).strip().lower() if answer == "yes" or answer.startswith("yes"): yes_answers[question] = answer else: no_answers[question] = answer return {"yes": yes_answers, "no": no_answers} # ─── دالة Florence-2 ────────────────────────────────────────────── def run_florence(image: Image.Image) -> dict: processor = MODEL_DATA["florence_processor"] model = MODEL_DATA["florence_model"] task = "" prompt = f"{task}{FLORENCE_QUESTION}" inputs = processor(text=prompt, images=image, return_tensors="pt") start = time.time() with torch.no_grad(): generated_ids = model.generate( input_ids=inputs["input_ids"], pixel_values=inputs["pixel_values"], max_new_tokens=10, do_sample=False ) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0] parsed = processor.post_process_generation( generated_text, task=task, image_size=(image.width, image.height) ) answer = parsed.get(task, "").strip().lower() elapsed = round(time.time() - start, 2) if answer == "no" or answer.startswith("no"): return {"decision": "ALLOW", "answer": answer, "elapsed": elapsed} else: return {"decision": "BLOCK", "answer": answer, "elapsed": elapsed} # ─── منطق القرار الرئيسي ───────────────────────────────────────── def process_image(image: Image.Image) -> dict: total_start = time.time() # ══ المرحلة 1: BLIP ══════════════════════════════════════════ blip_start = time.time() blip_result = run_blip(image) blip_elapsed = round(time.time() - blip_start, 2) yes_q = blip_result["yes"] no_q = blip_result["no"] # ─── الحالة 1: BLIP اكتشف امرأة مباشرة → BLOCK فوراً ───────── WOMAN_QUESTIONS = [ "is there a woman in this image?", ] woman_detected = any(q in yes_q for q in WOMAN_QUESTIONS) if woman_detected: return { "decision": "BLOCK", "reason": "blip_detected_woman_directly", "stage": "blip_only", "blip_yes": yes_q, "blip_no": no_q, "blip_time": blip_elapsed, "florence_used": False, "total_time": round(time.time() - total_start, 2), "status": "success" } # ─── الحالة 2: BLIP لم يكتشف أي إنسان → ALLOW فوراً ────────── if not yes_q: return { "decision": "ALLOW", "reason": "blip_no_human_detected", "stage": "blip_only", "blip_yes": yes_q, "blip_no": no_q, "blip_time": blip_elapsed, "florence_used": False, "total_time": round(time.time() - total_start, 2), "status": "success" } # ─── الحالة 3: BLIP اكتشف إنسان لكن ليس امرأة → Florence ───── florence_result = run_florence(image) final_decision = florence_result["decision"] reason = "florence_confirmed_woman" if final_decision == "BLOCK" \ else "florence_confirmed_no_woman" return { "decision": final_decision, "reason": reason, "stage": "blip_then_florence", "blip_yes": yes_q, "blip_no": no_q, "blip_time": blip_elapsed, "florence_answer": florence_result["answer"], "florence_time": florence_result["elapsed"], "florence_used": True, "total_time": round(time.time() - total_start, 2), "status": "success" } # ─── Health ─────────────────────────────────────────────────────── @app.get("/health") def health(): return { "status": "ok", "blip_loaded": "blip_model" in MODEL_DATA, "florence_loaded": "florence_model" in MODEL_DATA } # ─── Endpoint 1: من إضافة Chrome ───────────────────────────────── @app.post("/analyze") async def analyze_from_url(request: ImageUrlRequest): try: async with httpx.AsyncClient(timeout=30) as client: response = await client.get(request.image_url) response.raise_for_status() image_bytes = response.content except Exception as e: raise HTTPException(status_code=400, detail=f"فشل تحميل الصورة: {str(e)}") try: image = Image.open(io.BytesIO(image_bytes)).convert("RGB") except Exception as e: raise HTTPException(status_code=400, detail=f"خطأ في قراءة الصورة: {str(e)}") return process_image(image) # ─── Endpoint 2: اختبار يدوي ───────────────────────────────────── @app.post("/analyze-file") async def analyze_from_file(file: UploadFile = File(...)): if not file.content_type.startswith("image/"): raise HTTPException(status_code=400, detail="الملف ليس صورة") try: image = Image.open(io.BytesIO(await file.read())).convert("RGB") except Exception as e: raise HTTPException(status_code=400, detail=f"خطأ في قراءة الصورة: {str(e)}") return process_image(image) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)