Spaces:

Sairesh
/

game-brain-ai

Sleeping

App Files Files Community

Sairesh commited on Dec 27, 2025

Commit

a5b70d6

verified ·

1 Parent(s): 7dc7fec

Update main.py

Browse files

Files changed (1) hide show

main.py +99 -62

main.py CHANGED Viewed

@@ -1,53 +1,86 @@
 import io
 import torch
 from fastapi import FastAPI, UploadFile, File
 from PIL import Image
 from transformers import AutoProcessor, AutoModelForCausalLM
-from smolagents import CodeAgent, InferenceClientModel
-# =========================
-# FORCE SAFE ATTENTION
-# =========================
-torch.backends.cuda.enable_flash_sdp(False)
-torch.backends.cuda.enable_mem_efficient_sdp(False)
-torch.backends.cuda.enable_math_sdp(False)
-# =========================
-# APP
-# =========================
 app = FastAPI()
 device = "cpu"
-MODEL_ID = "microsoft/Florence-2-large"
 print("⏳ Loading Florence-2 (SAFE MODE)...")
-vision_model = AutoModelForCausalLM.from_pretrained(
-    MODEL_ID,
-    trust_remote_code=True,
-    attn_implementation="eager"   # 🔥 THIS FIXES YOUR ERROR
-).to(device)
-processor = AutoProcessor.from_pretrained(
-    MODEL_ID,
-    trust_remote_code=True
-)
-print("✅ Florence-2 loaded")
-# =========================
-# STRATEGIST (CLOUD)
-# =========================
 strategist = CodeAgent(
     tools=[],
-    model=InferenceClientModel(
-        model_id="meta-llama/Llama-3.2-3B-Instruct"
-    )
 )
-# =========================
-# ROUTES
-# =========================
 @app.get("/")
 def home():
     return {
@@ -58,42 +91,46 @@ def home():
 @app.post("/analyze/")
 async def analyze(file: UploadFile = File(...)):
-    img = Image.open(io.BytesIO(await file.read())).convert("RGB")
-    w, h = img.size
-    inputs = processor(
-        text="button, icon, start, attack, confirm, close, x, claim, menu",
-        images=img,
-        return_tensors="pt"
-    ).to(device)
-    with torch.no_grad():
-        out = vision_model.generate(
-            input_ids=inputs["input_ids"],
-            pixel_values=inputs["pixel_values"],
-            max_new_tokens=512
-        )
-    decoded = processor.batch_decode(out, skip_special_tokens=False)[0]
-    vision = processor.post_process_generation(
-        decoded,
-        task="<CAPTION_TO_PHRASE_GROUNDING>",
-        image_size=(w, h)
-    )
-    prompt = f"""
-You are a mobile bot.
-Resolution: 720x1600
-Detected UI: {vision}
-ONLY output:
-tap X Y
 """
-    decision = strategist.run(prompt)
-    return {
-        "decision": str(decision).strip(),
-        "vision": vision
-    }

+# main.py - Replace the whole file with this exact code
+import os
 import io
+import traceback
+# --- Basic imports
 import torch
 from fastapi import FastAPI, UploadFile, File
 from PIL import Image
+import transformers
+# --- PATCH: remove/ignore 'flash_attn' import requirement from remote modeling code
+# This prevents the dynamic import checker from forcing flash_attn installation.
+try:
+    from transformers import dynamic_module_utils
+    _orig_get_imports = dynamic_module_utils.get_imports
+    def _patched_get_imports(filename):
+        imports = _orig_get_imports(filename)
+        # remove problematic optional libs that cause the HF dynamic checker to abort
+        filtered = [imp for imp in imports if "flash_attn" not in imp and "xformers" not in imp]
+        return filtered
+    dynamic_module_utils.get_imports = _patched_get_imports
+except Exception:
+    # If patching fails, continue; downstream code will try to load models and may raise clearer errors.
+    pass
+# Now import model helpers from transformers (after patch above)
 from transformers import AutoProcessor, AutoModelForCausalLM
+# --- Safety: try to disable specialized SDPA/flash settings if present
+try:
+    # these calls exist only when built with CUDA-enabled torch backends; wrap in try/except
+    if hasattr(torch.backends, "cuda"):
+        if hasattr(torch.backends.cuda, "enable_flash_sdp"):
+            torch.backends.cuda.enable_flash_sdp(False)
+        if hasattr(torch.backends.cuda, "enable_mem_efficient_sdp"):
+            torch.backends.cuda.enable_mem_efficient_sdp(False)
+        if hasattr(torch.backends.cuda, "enable_math_sdp"):
+            torch.backends.cuda.enable_math_sdp(False)
+except Exception:
+    # ignore backend toggling errors on CPU-only environments
+    pass
+# --- App setup
 app = FastAPI()
 device = "cpu"
+VISION_MODEL_ID = "microsoft/Florence-2-large"
 print("⏳ Loading Florence-2 (SAFE MODE)...")
+try:
+    # Force legacy attention mode to avoid SDPA issues in some Florence versions.
+    vision_model = AutoModelForCausalLM.from_pretrained(
+        VISION_MODEL_ID,
+        trust_remote_code=True,
+        # some modelling code accepts this kwarg; it's safe if ignored by the model class
+        attn_implementation="eager"
+    ).to(device)
+    processor = AutoProcessor.from_pretrained(
+        VISION_MODEL_ID,
+        trust_remote_code=True
+    )
+    print("✅ Florence-2 loaded")
+except Exception as e:
+    # Provide clearer startup error in logs (Spaces will show this)
+    print("❌ Failed loading Florence-2: ")
+    traceback.print_exc()
+    # Re-raise so the Space fails loudly (you can check logs)
+    raise
+# Cloud strategist (unchanged)
+from smolagents import CodeAgent, InferenceClientModel
 strategist = CodeAgent(
     tools=[],
+    model=InferenceClientModel(model_id="meta-llama/Llama-3.2-3B-Instruct")
 )
 @app.get("/")
 def home():
     return {
 @app.post("/analyze/")
 async def analyze(file: UploadFile = File(...)):
+    try:
+        img_bytes = await file.read()
+        image = Image.open(io.BytesIO(img_bytes)).convert("RGB")
+        width, height = image.size
+        text_input = "button, icon, start, attack, confirm, close, x, claim, menu"
+        inputs = processor(text=text_input, images=image, return_tensors="pt").to(device)
+        with torch.no_grad():
+            generated_ids = vision_model.generate(
+                input_ids=inputs.get("input_ids"),
+                pixel_values=inputs.get("pixel_values"),
+                max_new_tokens=512
+            )
+        prediction = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+        vision_data = processor.post_process_generation(
+            prediction,
+            task="<CAPTION_TO_PHRASE_GROUNDING>",
+            image_size=(width, height)
+        )
+        prompt = f"""
+You are a mobile game bot for a Redmi 9i (720x1600).
+Visual Data: {vision_data}
+Task: Pick the best element to click to progress.
+Rule: You must ONLY output: tap X Y
+No other text.
 """
+        decision = strategist.run(prompt)
+        return {
+            "status": "success",
+            "decision": str(decision).strip(),
+            "debug": vision_data
+        }
+    except Exception as exc:
+        traceback.print_exc()
+        return {"status": "error", "detail": str(exc)}