Spaces:

sharathmajjigi
/

UITARS_Grounding_Model

Runtime error

App Files Files Community

sharathmajjigi commited on Aug 13, 2025

Commit

61ba6a6

1 Parent(s): c94a322

Add custom /v1/ground endpoint specifically for Agent-S

Browse files

Files changed (2) hide show

app.py +76 -12
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import gradio as gr
 from transformers import AutoProcessor, AutoModel
 import torch
@@ -6,6 +7,10 @@ import io
 import base64
 import json
 import numpy as np
 # UI-TARS model name
 model_name = "ByteDance-Seed/UI-TARS-1.5-7b"
@@ -37,7 +42,7 @@ def load_model():
     except Exception as e:
         print(f"❌ Error loading UI-TARS: {str(e)}")
-        print("�� Attempting to load with fallback configuration...")
         try:
             # Fallback: Load without device_map
@@ -106,23 +111,82 @@ def process_grounding(image, prompt):
             "status": "failed"
         }
-# Create Gradio interface with API enabled
 iface = gr.Interface(
     fn=process_grounding,
     inputs=[
         gr.Image(type="pil", label="Upload Screenshot"),
         gr.Textbox(label="Prompt/Goal", placeholder="What do you want to do?")
     ],
-    outputs=gr.JSON(label="Grounding Results"),  # Changed to JSON output
     title="UI-TARS Grounding Model",
-    description="Upload a screenshot and describe your goal to get grounding results from UI-TARS",
-    api_name="ground"  # This creates /api/ground endpoint
 )
-# Launch with API enabled
-iface.launch(
-    server_name="0.0.0.0",
-    server_port=7860,
-    share=False,
-    show_api=True  # This enables the API endpoints
-)

+# app.py - Add Custom Endpoint for Agent-S
 import gradio as gr
 from transformers import AutoProcessor, AutoModel
 import torch
 import base64
 import json
 import numpy as np
+from fastapi import FastAPI, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+import uvicorn
 # UI-TARS model name
 model_name = "ByteDance-Seed/UI-TARS-1.5-7b"
     except Exception as e:
         print(f"❌ Error loading UI-TARS: {str(e)}")
+        print(" Attempting to load with fallback configuration...")
         try:
             # Fallback: Load without device_map
             "status": "failed"
         }
+# Create FastAPI app
+app = FastAPI(title="UI-TARS Grounding API")
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Custom endpoint specifically for Agent-S
+@app.post("/v1/ground")
+async def agent_s_grounding(request: Request):
+    """
+    Custom endpoint specifically designed for Agent-S
+    """
+    try:
+        # Parse the request body
+        body = await request.json()
+        # Agent-S typically sends data in this format
+        if "data" in body and len(body["data"]) >= 2:
+            image = body["data"][0]  # First element is image
+            prompt = body["data"][1]  # Second element is prompt
+        elif "image" in body and "prompt" in body:
+            image = body["image"]
+            prompt = body["prompt"]
+        else:
+            return JSONResponse(
+                status_code=400,
+                content={"error": "Invalid request format", "status": "failed"}
+            )
+        # Process the request
+        result = process_grounding(image, prompt)
+        return JSONResponse(content=result)
+    except Exception as e:
+        return JSONResponse(
+            status_code=500,
+            content={"error": f"Internal server error: {str(e)}", "status": "failed"}
+        )
+# Alternative endpoint names for compatibility
+@app.post("/api/ground")
+async def api_ground(request: Request):
+    """Alternative endpoint name for compatibility"""
+    return await agent_s_grounding(request)
+@app.post("/predict")
+async def predict(request: Request):
+    """Alternative endpoint name for compatibility"""
+    return await agent_s_grounding(request)
+@app.post("/")
+async def root_endpoint(request: Request):
+    """Root endpoint for compatibility"""
+    return await agent_s_grounding(request)
+# Create Gradio interface
 iface = gr.Interface(
     fn=process_grounding,
     inputs=[
         gr.Image(type="pil", label="Upload Screenshot"),
         gr.Textbox(label="Prompt/Goal", placeholder="What do you want to do?")
     ],
+    outputs=gr.JSON(label="Grounding Results"),
     title="UI-TARS Grounding Model",
+    description="Upload a screenshot and describe your goal to get grounding results from UI-TARS"
 )
+# Mount Gradio app to FastAPI
+app = gr.mount_gradio_app(app, iface, path="/gradio")
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)

requirements.txt CHANGED Viewed

@@ -4,4 +4,6 @@ torchvision>=0.15.0
 accelerate>=0.20.0
 numpy>=1.21.0
 Pillow>=9.0.0
-gradio>=4.0.0

 accelerate>=0.20.0
 numpy>=1.21.0
 Pillow>=9.0.0
+gradio>=4.0.0
+fastapi>=0.100.0
+uvicorn>=0.20.0