Spaces:

OppaAI
/

Robot_MCP_Server

Sleeping

App Files Files Community

OppaAI commited on Nov 23, 2025

Commit

87deda2

verified ·

1 Parent(s): ff60aab

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -27

app.py CHANGED Viewed

@@ -6,16 +6,17 @@ from huggingface_hub import HfApi, InferenceClient
 from datetime import datetime
 import traceback
 from typing import Optional, Dict, Any
-import asyncio
-from fastmcp import Client, FastMCP
 # --- Configuration ---
 HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "OppaAI/Robot_MCP")
 HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
-REMOTE_MCP_URL = os.environ.get("REMOTE_MCP_URL", "https://oppaai-robot-mcp-server.hf.space/gradio_api/mcp/sse")
 mcp = FastMCP("Robot_MCP_Server")
 # -----------------------------------------------------
 # Save and upload image to HF
 # -----------------------------------------------------
@@ -42,7 +43,9 @@ def upload_image(image_b64: str, hf_token: str):
             token=hf_token
         )
-        url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/{filename}"
         return local_path, url, filename, size_bytes
     except Exception as e:
@@ -50,6 +53,7 @@ def upload_image(image_b64: str, hf_token: str):
         traceback.print_exc()
         return None, None, None, 0
 # -----------------------------------------------------
 # JSON parsing helper
 # -----------------------------------------------------
@@ -60,20 +64,24 @@ def safe_parse_json_from_text(text: str) -> Optional[Dict[str, Any]]:
         return json.loads(text)
     except:
         pass
     cleaned = text.strip().strip("`").strip()
     if cleaned.lower().startswith("json"):
         cleaned = cleaned[4:].strip()
     try:
         start = cleaned.find("{")
         end = cleaned.rfind("}")
         if start >= 0 and end > start:
-            return json.loads(cleaned[start:end+1])
     except:
         return None
     return None
 # -----------------------------------------------------
-# Main pipeline: image → VLM → remote tool
 # -----------------------------------------------------
 @mcp.tool()
 def robot_watch(payload: Dict[str, Any]) -> Dict[str, Any]:
@@ -92,22 +100,20 @@ def robot_watch(payload: Dict[str, Any]) -> Dict[str, Any]:
     if not image_b64:
         return {"error": "image_b64 missing"}
-    # Save + Upload
     _, hf_url, _, size_bytes = upload_image(image_b64, hf_token)
     if not hf_url:
         return {"error": "Image upload failed"}
-    # VLM system prompt: decide MCP tool automatically
-    system_prompt = f"""
 Respond in STRICT JSON ONLY.
-Rules:
-Provide a long detail description of what you see
 Output format:
-{{
  "description": "...",
- "human": brief description of humans if any (eg. a man with glasses)
- "environment": category of the environment (eg. room)
-}}
 """
     messages = [
@@ -119,6 +125,7 @@ Output format:
     ]
     client = InferenceClient(token=hf_token)
     try:
         response = client.chat.completions.create(
             model=HF_VLM_MODEL,
@@ -131,20 +138,34 @@ Output format:
     vlm_output = response.choices[0].message.content.strip()
     parsed = safe_parse_json_from_text(vlm_output)
     if parsed is None:
-        return {"status": "model_no_json", "robot_id": robot_id, "vlm_raw": vlm_output, "message": "VLM returned invalid JSON"}
     return {
         "status": "success",
         "robot_id": robot_id,
         "file_size_bytes": size_bytes,
-        "vlm_description": parsed.get("description"),
         "vlm_raw": vlm_output
     }
-# ------------------------------
-# Gradio Interface
-# ------------------------------
 app = gr.Interface(
     fn=process_and_describe,
     inputs=gr.JSON(label="Input JSON Payload (must include hf_token & image_b64)"),
@@ -153,12 +174,13 @@ app = gr.Interface(
     flagging_mode="never"
 )
-# ------------------------------
-# Main Entry
-# ------------------------------
 if __name__ == "__main__":
-    print(f"[Config] HF_DATASET_REPO: {HF_DATASET_REPO}")
-    print(f"[Config] HF_VLM_MODEL: {HF_VLM_MODEL}")
-    print(f"[Config] REMOTE_MCP_URL: {REMOTE_MCP_URL}")
     print("[Gradio] Launching interface...")
-    app.launch(mcp_server=True)

 from datetime import datetime
 import traceback
 from typing import Optional, Dict, Any
+from fastmcp import FastMCP
 # --- Configuration ---
 HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "OppaAI/Robot_MCP")
 HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
+# Create MCP server
 mcp = FastMCP("Robot_MCP_Server")
 # -----------------------------------------------------
 # Save and upload image to HF
 # -----------------------------------------------------
             token=hf_token
         )
+        # FIXED URL
+        url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/tmp/{filename}"
         return local_path, url, filename, size_bytes
     except Exception as e:
         traceback.print_exc()
         return None, None, None, 0
 # -----------------------------------------------------
 # JSON parsing helper
 # -----------------------------------------------------
         return json.loads(text)
     except:
         pass
     cleaned = text.strip().strip("`").strip()
     if cleaned.lower().startswith("json"):
         cleaned = cleaned[4:].strip()
     try:
         start = cleaned.find("{")
         end = cleaned.rfind("}")
         if start >= 0 and end > start:
+            return json.loads(cleaned[start:end + 1])
     except:
         return None
     return None
 # -----------------------------------------------------
+# MCP Tool: image → VLM → structured JSON
 # -----------------------------------------------------
 @mcp.tool()
 def robot_watch(payload: Dict[str, Any]) -> Dict[str, Any]:
     if not image_b64:
         return {"error": "image_b64 missing"}
+    # 1. Save + Upload
     _, hf_url, _, size_bytes = upload_image(image_b64, hf_token)
     if not hf_url:
         return {"error": "Image upload failed"}
+    # 2. VLM prompt
+    system_prompt = """
 Respond in STRICT JSON ONLY.
 Output format:
+{
  "description": "...",
+ "human": "...",
+ "environment": "..."
+}
 """
     messages = [
     ]
     client = InferenceClient(token=hf_token)
     try:
         response = client.chat.completions.create(
             model=HF_VLM_MODEL,
     vlm_output = response.choices[0].message.content.strip()
     parsed = safe_parse_json_from_text(vlm_output)
     if parsed is None:
+        return {
+            "status": "model_no_json",
+            "robot_id": robot_id,
+            "vlm_raw": vlm_output,
+            "message": "VLM returned invalid JSON"
+        }
     return {
         "status": "success",
         "robot_id": robot_id,
         "file_size_bytes": size_bytes,
+        "image_url": hf_url,
+        "description": parsed.get("description"),
+        "human": parsed.get("human"),
+        "environment": parsed.get("environment"),
         "vlm_raw": vlm_output
     }
+# -----------------------------------------------------
+# Gradio Interface wrapper
+# -----------------------------------------------------
+def process_and_describe(payload):
+    return robot_watch(payload)
 app = gr.Interface(
     fn=process_and_describe,
     inputs=gr.JSON(label="Input JSON Payload (must include hf_token & image_b64)"),
     flagging_mode="never"
 )
+# -----------------------------------------------------
+# Entry
+# -----------------------------------------------------
 if __name__ == "__main__":
+    print("[MCP] Robot MCP Server starting...")
+    mcp.run(background=True)
     print("[Gradio] Launching interface...")
+    app.launch()