Spaces:

OppaAI
/

Robot_MCP_Server

Sleeping

App Files Files Community

OppaAI commited on Nov 23, 2025

Commit

8c3dcd1

verified ·

1 Parent(s): 00f5c0d

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -61

app.py CHANGED Viewed

@@ -1,25 +1,24 @@
 import os
 import base64
 import json
-import gradio as gr
-from huggingface_hub import HfApi, InferenceClient
 from datetime import datetime
 import traceback
 from typing import Optional, Dict, Any
 from fastmcp import FastMCP
-# --- Configuration ---
 HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "OppaAI/Robot_MCP")
 HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
-# Create MCP server
 mcp = FastMCP("Robot_MCP_Server")
-# -----------------------------------------------------
-# Save and upload image to HF
-# -----------------------------------------------------
 def upload_image(image_b64: str, hf_token: str):
     try:
         image_bytes = base64.b64decode(image_b64)
@@ -43,21 +42,18 @@ def upload_image(image_b64: str, hf_token: str):
             token=hf_token
         )
-        # FIXED URL
         url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/tmp/{filename}"
         return local_path, url, filename, size_bytes
-    except Exception as e:
-        print(f"[Error] during image upload: {e}")
         traceback.print_exc()
         return None, None, None, 0
-# -----------------------------------------------------
-# JSON parsing helper
-# -----------------------------------------------------
-def safe_parse_json_from_text(text: str) -> Optional[Dict[str, Any]]:
     if not text:
         return None
     try:
@@ -72,19 +68,16 @@ def safe_parse_json_from_text(text: str) -> Optional[Dict[str, Any]]:
     try:
         start = cleaned.find("{")
         end = cleaned.rfind("}")
-        if start >= 0 and end > start:
-            return json.loads(cleaned[start:end + 1])
     except:
         return None
-    return None
-# -----------------------------------------------------
-# MCP Tool: image → VLM → structured JSON
-# -----------------------------------------------------
 @mcp.tool()
-def robot_watch(payload: Dict[str, Any]) -> Dict[str, Any]:
     if isinstance(payload, str):
         try:
             payload = json.loads(payload)
@@ -100,15 +93,14 @@ def robot_watch(payload: Dict[str, Any]) -> Dict[str, Any]:
     if not image_b64:
         return {"error": "image_b64 missing"}
-    # 1. Save + Upload
     _, hf_url, _, size_bytes = upload_image(image_b64, hf_token)
     if not hf_url:
         return {"error": "Image upload failed"}
-    # 2. VLM prompt
     system_prompt = """
 Respond in STRICT JSON ONLY.
-Output format:
 {
  "description": "...",
  "human": "...",
@@ -119,7 +111,7 @@ Output format:
     messages = [
         {"role": "system", "content": system_prompt},
         {"role": "user", "content": [
-            {"type": "text", "text": "Analyze the image and provide the description."},
             {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}}
         ]}
     ]
@@ -127,24 +119,23 @@ Output format:
     client = InferenceClient(token=hf_token)
     try:
-        response = client.chat.completions.create(
             model=HF_VLM_MODEL,
             messages=messages,
             max_tokens=300,
-            temperature=0.1,
         )
     except Exception as e:
-        return {"status": "error", "message": f"Inference API call failed: {e}"}
-    vlm_output = response.choices[0].message.content.strip()
     parsed = safe_parse_json_from_text(vlm_output)
     if parsed is None:
         return {
             "status": "model_no_json",
-            "robot_id": robot_id,
             "vlm_raw": vlm_output,
-            "message": "VLM returned invalid JSON"
         }
     return {
@@ -152,31 +143,4 @@ Output format:
         "robot_id": robot_id,
         "file_size_bytes": size_bytes,
         "image_url": hf_url,
-        "description": parsed.get("description"),
-        "human": parsed.get("human"),
-        "environment": parsed.get("environment"),
-        "vlm_raw": vlm_output
-    }
-# -----------------------------------------------------
-# Gradio Interface wrapper
-# -----------------------------------------------------
-def process_and_describe(payload):
-    return robot_watch(payload)
-app = gr.Interface(
-    fn=process_and_describe,
-    inputs=gr.JSON(label="Input JSON Payload (must include hf_token & image_b64)"),
-    outputs=gr.JSON(label="Output JSON Result"),
-    api_name="predict",
-    flagging_mode="never"
-)
-# -----------------------------------------------------
-# Entry
-# -----------------------------------------------------
-if __name__ == "__main__":
-    app.launch(mcp_server=True)

 import os
 import base64
 import json
 from datetime import datetime
 import traceback
 from typing import Optional, Dict, Any
+import gradio as gr
+from huggingface_hub import HfApi, InferenceClient
 from fastmcp import FastMCP
 HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "OppaAI/Robot_MCP")
 HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
 mcp = FastMCP("Robot_MCP_Server")
+# -------------------------------
+# Upload helper
+# -------------------------------
 def upload_image(image_b64: str, hf_token: str):
     try:
         image_bytes = base64.b64decode(image_b64)
             token=hf_token
         )
         url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/tmp/{filename}"
         return local_path, url, filename, size_bytes
+    except Exception:
         traceback.print_exc()
         return None, None, None, 0
+# -------------------------------
+# Safe JSON parse
+# -------------------------------
+def safe_parse_json_from_text(text: str):
     if not text:
         return None
     try:
     try:
         start = cleaned.find("{")
         end = cleaned.rfind("}")
+        return json.loads(cleaned[start:end + 1])
     except:
         return None
+# -------------------------------
+# MCP TOOL
+# -------------------------------
 @mcp.tool()
+def robot_watch(payload: Dict[str, Any]):
     if isinstance(payload, str):
         try:
             payload = json.loads(payload)
     if not image_b64:
         return {"error": "image_b64 missing"}
+    # Upload image
     _, hf_url, _, size_bytes = upload_image(image_b64, hf_token)
     if not hf_url:
         return {"error": "Image upload failed"}
+    # VLM call
     system_prompt = """
 Respond in STRICT JSON ONLY.
 {
  "description": "...",
  "human": "...",
     messages = [
         {"role": "system", "content": system_prompt},
         {"role": "user", "content": [
+            {"type": "text", "text": "Analyze the image."},
             {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}}
         ]}
     ]
     client = InferenceClient(token=hf_token)
     try:
+        resp = client.chat.completions.create(
             model=HF_VLM_MODEL,
             messages=messages,
             max_tokens=300,
+            temperature=0.1
         )
     except Exception as e:
+        return {"status": "error", "message": str(e)}
+    vlm_output = resp.choices[0].message.content.strip()
     parsed = safe_parse_json_from_text(vlm_output)
     if parsed is None:
         return {
             "status": "model_no_json",
             "vlm_raw": vlm_output,
+            "message": "Invalid JSON returned"
         }
     return {
         "robot_id": robot_id,
         "file_size_bytes": size_bytes,
         "image_url": hf_url,
+        "de