Spaces:

OppaAI
/

Robot_MCP_Client

Sleeping

App Files Files Community

OppaAI commited on Nov 24, 2025

Commit

9ac3c29

verified ·

1 Parent(s): ef5183f

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -57

app.py CHANGED Viewed

@@ -6,18 +6,16 @@ from fastmcp.client import StreamableHttpTransport
 import asyncio
 import ast
 import json
-import warnings
-# -------------------------------
-# 0. CLEANUP: Ignore the spammy DeprecationWarnings
-# -------------------------------
-warnings.filterwarnings("ignore", category=DeprecationWarning)
-warnings.filterwarnings("ignore", category=UserWarning)
 # -------------------------------
 # MCP server info
 # -------------------------------
 ROBOT_ID = "Robot_MCP_Client"
 MCP_SERVER_URL = "https://oppaai-robot-mcp-server.hf.space/gradio_api/mcp/"
 SERVER_NAME = "Robot_MCP_Server"
 TOOL_NAME = "Robot_MCP_Server_robot_watch"
@@ -29,61 +27,65 @@ HTTP_TRANSPORT = StreamableHttpTransport(url=MCP_SERVER_URL)
 MCP_CLIENT = Client(transport=HTTP_TRANSPORT, name=SERVER_NAME)
 # -------------------------------
-# Async function
 # -------------------------------
-async def process_webcam_stream_async(image, oauth_token: gr.OAuthToken | None = None):
-    # 1. Login Check
-    if oauth_token is None:
-        return "⚠️ Please log in via the button above to start.", "", "", "", "", "", "", ""
-    # 2. Image Check
     if image is None:
         return "", "", "", "", "", "", "", ""
     try:
-        # 3. Process Image
-        buffered = io.BytesIO()
-        image.save(buffered, format="JPEG")
-        b64_img = base64.b64encode(buffered.getvalue()).decode("utf-8")
-        payload = {
-            "hf_token_input": oauth_token.token,
-            "robot_id_input": ROBOT_ID,
-            "image_b64_input": b64_img
-        }
-        # 4. Call MCP Server
         async with MCP_CLIENT:
             response = await MCP_CLIENT.call_tool(TOOL_NAME, payload)
-            # Handle MCP Errors
             if response.is_error:
                 error_msg = "Unknown Error"
                 if hasattr(response, 'content') and isinstance(response.content, list):
                     error_msg = " ".join([getattr(item, 'text', '') for item in response.content])
                 raise Exception(f"MCP Tool Error: {error_msg}")
-            # 5. Extract Text from Response List
             raw_text = ""
             if hasattr(response, 'content') and isinstance(response.content, list):
                 for item in response.content:
                     if hasattr(item, 'text'):
                         raw_text += item.text
             else:
                 raw_text = str(response)
-            # 6. Parse JSON/Dict
             try:
                 response_dict = json.loads(raw_text)
             except json.JSONDecodeError:
                 try:
                     response_dict = ast.literal_eval(raw_text)
                 except Exception:
                     return f"Parsing Error. Raw output: {raw_text}", "", "", "", "", "", "", ""
             vlm_result = response_dict.get("result", {})
-            # 7. Map to Outputs
             description_out = vlm_result.get("description", "")
             environment_out = vlm_result.get("environment", "")
             indoor_outdoor_out = vlm_result.get("indoor_or_outdoor", "")
@@ -107,44 +109,33 @@ async def process_webcam_stream_async(image, oauth_token: gr.OAuthToken | None =
             )
     except Exception as e:
-        print(f"Error: {e}")
         return f"Error: {e}", "", "", "", "", "", "", ""
 # -------------------------------
 # Gradio UI
 # -------------------------------
-with gr.Blocks(title="Robot Vision MCP") as demo:
     gr.Markdown("## 🎥 Robot Vision Webcam Stream (MCP Client)")
-    # Login Button
     gr.LoginButton()
     with gr.Row():
         webcam_input = gr.Image(
-            label="Webcam Input",
             sources=["webcam"],
             type="pil"
         )
         with gr.Column():
-            description_out = gr.Textbox(label="Description", lines=4)
-            with gr.Row():
-                environment_out = gr.Textbox(label="Environment")
-                indoor_outdoor_out = gr.Textbox(label="In/Out")
-            with gr.Row():
-                human_out = gr.Textbox(label="Humans")
-                hazards_out = gr.Textbox(label="Hazards")
-            # Hidden / Extra fields (optional, add back if needed)
-            lighting_condition_out = gr.Textbox(visible=False)
-            animals_out = gr.Textbox(visible=False)
-            objects_out = gr.Textbox(visible=False)
-    # -------------------------------
-    # STREAM CONFIGURATION (The Important Fix)
-    # -------------------------------
     webcam_input.stream(
         process_webcam_stream_async,
         inputs=[webcam_input],
@@ -158,10 +149,7 @@ with gr.Blocks(title="Robot Vision MCP") as demo:
             objects_out,
             hazards_out
         ],
-        # Update every 3 seconds to give the AI time to think
-        stream_every=3.0,
-        # Wait for the previous request to finish before sending a new one
-        concurrency_limit=1
     )
 if __name__ == "__main__":

 import asyncio
 import ast
 import json
 # -------------------------------
 # MCP server info
 # -------------------------------
 ROBOT_ID = "Robot_MCP_Client"
+HF_TOKEN = os.environ.get("HF_TOKEN")
+if not HF_TOKEN:
+    print("Warning: HF_TOKEN not found. API calls may fail.")
+    HF_TOKEN = "missing_token_placeholder"
 MCP_SERVER_URL = "https://oppaai-robot-mcp-server.hf.space/gradio_api/mcp/"
 SERVER_NAME = "Robot_MCP_Server"
 TOOL_NAME = "Robot_MCP_Server_robot_watch"
 MCP_CLIENT = Client(transport=HTTP_TRANSPORT, name=SERVER_NAME)
 # -------------------------------
+# Async function using user's HF token
 # -------------------------------
+async def process_webcam_stream_async(image):
     if image is None:
         return "", "", "", "", "", "", "", ""
+    if HF_TOKEN == "missing_token_placeholder":
+        return "Error: HF_TOKEN not set locally.", "", "", "", "", "", "", ""
+    # Convert image to Base64
+    buffered = io.BytesIO()
+    image.save(buffered, format="JPEG")
+    b64_img = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    payload = {
+        "hf_token_input": HF_TOKEN,
+        "robot_id_input": ROBOT_ID,
+        "image_b64_input": b64_img
+    }
     try:
         async with MCP_CLIENT:
             response = await MCP_CLIENT.call_tool(TOOL_NAME, payload)
             if response.is_error:
+                # Handle error content safely
                 error_msg = "Unknown Error"
                 if hasattr(response, 'content') and isinstance(response.content, list):
                     error_msg = " ".join([getattr(item, 'text', '') for item in response.content])
                 raise Exception(f"MCP Tool Error: {error_msg}")
+            # ---------------------------------------------------------
+            # FIX: Handle List Content
+            # The 'content' is a list of objects (e.g., TextContent).
+            # We iterate through the list and join the text parts.
+            # ---------------------------------------------------------
             raw_text = ""
             if hasattr(response, 'content') and isinstance(response.content, list):
                 for item in response.content:
+                    # Check if the item has a 'text' attribute
                     if hasattr(item, 'text'):
                         raw_text += item.text
             else:
+                # Fallback for unexpected structure
                 raw_text = str(response)
+            # 6. PARSE RESPONSE
             try:
                 response_dict = json.loads(raw_text)
             except json.JSONDecodeError:
                 try:
                     response_dict = ast.literal_eval(raw_text)
                 except Exception:
+                    # If parsing fails completely, return the raw text in description
                     return f"Parsing Error. Raw output: {raw_text}", "", "", "", "", "", "", ""
             vlm_result = response_dict.get("result", {})
+            # 7. EXTRACT DATA
             description_out = vlm_result.get("description", "")
             environment_out = vlm_result.get("environment", "")
             indoor_outdoor_out = vlm_result.get("indoor_or_outdoor", "")
             )
     except Exception as e:
+        print(f"Error calling MCP API: {e}")
         return f"Error: {e}", "", "", "", "", "", "", ""
 # -------------------------------
 # Gradio UI
 # -------------------------------
+with gr.Blocks() as demo:
     gr.Markdown("## 🎥 Robot Vision Webcam Stream (MCP Client)")
     gr.LoginButton()
     with gr.Row():
         webcam_input = gr.Image(
+            label="Captured from Web-Cam",
             sources=["webcam"],
             type="pil"
         )
         with gr.Column():
+            description_out = gr.Textbox(label="Description", lines=5)
+            environment_out = gr.Textbox(label="Environment", lines=3)
+            indoor_outdoor_out = gr.Textbox(label="Indoor/Outdoor", lines=1)
+            lighting_condition_out = gr.Textbox(label="Lighting Condition", lines=1)
+            human_out = gr.Textbox(label="Human Detected", lines=3)
+            animals_out = gr.Textbox(label="Animals Detected", lines=2)
+            objects_out = gr.Textbox(label="Objects Detected", lines=2)
+            hazards_out = gr.Textbox(label="Hazards Identified", lines=2)
     webcam_input.stream(
         process_webcam_stream_async,
         inputs=[webcam_input],
             objects_out,
             hazards_out
         ],
+        stream_every=1.0
     )
 if __name__ == "__main__":