Spaces:

webolavo
/

webai

Sleeping

File size: 10,728 Bytes

# ─── flash_attn Mock ─────────────────────────────────────────────
import sys
import types
import importlib.util

flash_mock = types.ModuleType("flash_attn")
flash_mock.__version__ = "2.0.0"
flash_mock.__spec__ = importlib.util.spec_from_loader("flash_attn", loader=None)
sys.modules["flash_attn"] = flash_mock
sys.modules["flash_attn.flash_attn_interface"] = types.ModuleType("flash_attn.flash_attn_interface")
sys.modules["flash_attn.bert_padding"] = types.ModuleType("flash_attn.bert_padding")
# ─────────────────────────────────────────────────────────────────

import io
import time
import httpx
import torch
from PIL import Image
from transformers import (
    BlipProcessor, BlipForQuestionAnswering,
    AutoProcessor, AutoModelForCausalLM
)
from fastapi import FastAPI, HTTPException, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from contextlib import asynccontextmanager

# ─── النماذج ─────────────────────────────────────────────────────
BLIP_MODEL_ID     = "Salesforce/blip-vqa-base"
FLORENCE_MODEL_ID = "microsoft/Florence-2-large-ft"

# ─── أسئلة BLIP ───────────────────────────────────────────────────
QUESTIONS = [
    "is there a person in this image?",
    "is there a woman in this image?",
    "is there a human body part in this image?",
    "is there a hand or arm visible?",
    "is there a face visible?",
    "is there a leg or foot visible?",
    "is there a belly or stomach visible?",
]

# ─── سؤال Florence ────────────────────────────────────────────────
FLORENCE_QUESTION = (
    "Is there a woman or any part of a woman's body in this image? "
    "Answer yes or no only."
)

MODEL_DATA = {}

@asynccontextmanager
async def lifespan(app: FastAPI):
    # ── تحميل BLIP ────────────────────────────────────────────────
    print(f"📥 Loading {BLIP_MODEL_ID}...")
    start = time.time()
    MODEL_DATA["blip_processor"] = BlipProcessor.from_pretrained(BLIP_MODEL_ID)
    MODEL_DATA["blip_model"]     = BlipForQuestionAnswering.from_pretrained(
        BLIP_MODEL_ID, torch_dtype=torch.float32
    ).eval()
    print(f"✅ BLIP ready in {time.time()-start:.1f}s")

    # ── تحميل Florence-2 ──────────────────────────────────────────
    print(f"📥 Loading {FLORENCE_MODEL_ID}...")
    start = time.time()
    MODEL_DATA["florence_processor"] = AutoProcessor.from_pretrained(
        FLORENCE_MODEL_ID, trust_remote_code=True
    )
    MODEL_DATA["florence_model"] = AutoModelForCausalLM.from_pretrained(
        FLORENCE_MODEL_ID,
        torch_dtype=torch.float32,
        trust_remote_code=True,
        attn_implementation="eager"
    ).eval()
    print(f"✅ Florence-2 ready in {time.time()-start:.1f}s")

    yield
    MODEL_DATA.clear()

app = FastAPI(
    title="AI Shield - Dual Model Detection",
    description="BLIP + Florence-2-large-ft | Compatible with AI Shield Chrome Extension",
    version="6.0.0",
    lifespan=lifespan
)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# ─── Schema ───────────────────────────────────────────────────────
class ImageUrlRequest(BaseModel):
    image_url: str

# ─── دالة BLIP ────────────────────────────────────────────────────
def run_blip(image: Image.Image) -> dict:
    processor   = MODEL_DATA["blip_processor"]
    model       = MODEL_DATA["blip_model"]
    yes_answers = {}
    no_answers  = {}

    for question in QUESTIONS:
        inputs = processor(image, question, return_tensors="pt")
        with torch.no_grad():
            out = model.generate(**inputs, max_new_tokens=5)
        answer = processor.decode(out[0], skip_special_tokens=True).strip().lower()
        if answer == "yes" or answer.startswith("yes"):
            yes_answers[question] = answer
        else:
            no_answers[question] = answer

    return {"yes": yes_answers, "no": no_answers}

# ─── دالة Florence-2 ──────────────────────────────────────────────
def run_florence(image: Image.Image) -> dict:
    processor = MODEL_DATA["florence_processor"]
    model     = MODEL_DATA["florence_model"]

    task   = "<VQA>"
    prompt = f"{task}{FLORENCE_QUESTION}"
    inputs = processor(text=prompt, images=image, return_tensors="pt")

    start = time.time()
    with torch.no_grad():
        generated_ids = model.generate(
            input_ids=inputs["input_ids"],
            pixel_values=inputs["pixel_values"],
            max_new_tokens=10,
            do_sample=False
        )

    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
    parsed = processor.post_process_generation(
        generated_text, task=task,
        image_size=(image.width, image.height)
    )
    answer  = parsed.get(task, "").strip().lower()
    elapsed = round(time.time() - start, 2)

    if answer == "no" or answer.startswith("no"):
        return {"decision": "ALLOW", "answer": answer, "elapsed": elapsed}
    else:
        return {"decision": "BLOCK", "answer": answer, "elapsed": elapsed}

# ─── منطق القرار الرئيسي ─────────────────────────────────────────
def process_image(image: Image.Image) -> dict:
    total_start = time.time()

    # ══ المرحلة 1: BLIP ══════════════════════════════════════════
    blip_start  = time.time()
    blip_result = run_blip(image)
    blip_elapsed = round(time.time() - blip_start, 2)

    yes_q = blip_result["yes"]
    no_q  = blip_result["no"]

    # ─── الحالة 1: BLIP اكتشف امرأة مباشرة → BLOCK فوراً ─────────
    WOMAN_QUESTIONS = [
        "is there a woman in this image?",
    ]
    woman_detected = any(q in yes_q for q in WOMAN_QUESTIONS)

    if woman_detected:
        return {
            "decision":      "BLOCK",
            "reason":        "blip_detected_woman_directly",
            "stage":         "blip_only",
            "blip_yes":      yes_q,
            "blip_no":       no_q,
            "blip_time":     blip_elapsed,
            "florence_used": False,
            "total_time":    round(time.time() - total_start, 2),
            "status":        "success"
        }

    # ─── الحالة 2: BLIP لم يكتشف أي إنسان → ALLOW فوراً ──────────
    if not yes_q:
        return {
            "decision":      "ALLOW",
            "reason":        "blip_no_human_detected",
            "stage":         "blip_only",
            "blip_yes":      yes_q,
            "blip_no":       no_q,
            "blip_time":     blip_elapsed,
            "florence_used": False,
            "total_time":    round(time.time() - total_start, 2),
            "status":        "success"
        }

    # ─── الحالة 3: BLIP اكتشف إنسان لكن ليس امرأة → Florence ─────
    florence_result = run_florence(image)

    final_decision = florence_result["decision"]
    reason = "florence_confirmed_woman" if final_decision == "BLOCK" \
             else "florence_confirmed_no_woman"

    return {
        "decision":        final_decision,
        "reason":          reason,
        "stage":           "blip_then_florence",
        "blip_yes":        yes_q,
        "blip_no":         no_q,
        "blip_time":       blip_elapsed,
        "florence_answer": florence_result["answer"],
        "florence_time":   florence_result["elapsed"],
        "florence_used":   True,
        "total_time":      round(time.time() - total_start, 2),
        "status":          "success"
    }

# ─── Health ───────────────────────────────────────────────────────
@app.get("/health")
def health():
    return {
        "status":          "ok",
        "blip_loaded":     "blip_model" in MODEL_DATA,
        "florence_loaded": "florence_model" in MODEL_DATA
    }

# ─── Endpoint 1: من إضافة Chrome ─────────────────────────────────
@app.post("/analyze")
async def analyze_from_url(request: ImageUrlRequest):
    try:
        async with httpx.AsyncClient(timeout=30) as client:
            response = await client.get(request.image_url)
            response.raise_for_status()
            image_bytes = response.content
    except Exception as e:
        raise HTTPException(status_code=400, detail=f"فشل تحميل الصورة: {str(e)}")

    try:
        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
    except Exception as e:
        raise HTTPException(status_code=400, detail=f"خطأ في قراءة الصورة: {str(e)}")

    return process_image(image)

# ─── Endpoint 2: اختبار يدوي ─────────────────────────────────────
@app.post("/analyze-file")
async def analyze_from_file(file: UploadFile = File(...)):
    if not file.content_type.startswith("image/"):
        raise HTTPException(status_code=400, detail="الملف ليس صورة")

    try:
        image = Image.open(io.BytesIO(await file.read())).convert("RGB")
    except Exception as e:
        raise HTTPException(status_code=400, detail=f"خطأ في قراءة الصورة: {str(e)}")

    return process_image(image)


if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)