Spaces:

OppaAI
/

Robot_MCP_Client

Sleeping

App Files Files Community

OppaAI commited on Nov 23, 2025

Commit

b18ef1e

verified ·

1 Parent(s): 6d55b52

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -17

app.py CHANGED Viewed

@@ -4,11 +4,18 @@ import time
 import io
 import gradio as gr
 from gradio_client import Client
-# Load environment variables
-ROBOT_ID = os.environ.get("ROBOT_ID")
 HF_TOKEN = os.environ.get("HF_CV_ROBOT_TOKEN")
-HF_SPACE = "OppaAI/Robot_MCP_Server"
 API_NAME = "/predict"
@@ -22,28 +29,46 @@ def process_webcam_stream(image):
     image.save(buffered, format="JPEG")
     b64_img = base64.b64encode(buffered.getvalue()).decode("utf-8")
-    # Prepare payload
     payload = {
-        "image_b64": b64_img,
-        "robot_id": ROBOT_ID,
-        "timestamp": time.time(),
-        "hf_token": HF_TOKEN
     }
     # Send to HF Space using streaming-friendly predict
     client = Client(HF_SPACE)
     try:
-        resp = client.predict(payload, api_name=API_NAME)
-        objects_list = resp.get("objects", [])
         objects_str = ", ".join(objects_list) if isinstance(objects_list, list) else str(objects_list)
         return (
-            resp.get("description", ""),
-            resp.get("human", ""),
             objects_str,
-            resp.get("environment", "")
         )
     except Exception as e:
         return f"Error: {e}", "", "", ""
@@ -56,11 +81,13 @@ with gr.Blocks() as demo:
             sources=["upload", "webcam"],
             type="pil"
         )
-        description_out = gr.Textbox(label="Description")
-        human_out = gr.Textbox(label="Human")
-        objects_out = gr.Textbox(label="Objects")
-        environment_out = gr.Textbox(label="Environment")
     webcam_input.stream(
         process_webcam_stream,
         inputs=[webcam_input],

 import io
 import gradio as gr
 from gradio_client import Client
+from dotenv import load_dotenv
+# Load environment variables (ensure .env is set up locally)
+load_dotenv()
+ROBOT_ID = os.environ.get("ROBOT_ID", "unknown") # Default to 'unknown' if missing
 HF_TOKEN = os.environ.get("HF_CV_ROBOT_TOKEN")
+if not HF_TOKEN:
+    # If token is missing, the API call will likely fail, but we can proceed
+    print("Warning: HF_TOKEN not found. API calls may fail.")
+HF_SPACE = "OppaAI/Robot_MCP_Server"   # HF Space name
 API_NAME = "/predict"
     image.save(buffered, format="JPEG")
     b64_img = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    # Prepare payload with the CORRECT keys matching the server function arguments
+    # Server expects: hf_token_input, robot_id_input, image_b64_input
     payload = {
+        "hf_token_input": HF_TOKEN,
+        "robot_id_input": ROBOT_ID,
+        # "timestamp": time.time(), # Server function doesn't use this, so we remove it
+        "image_b64_input": b64_img
     }
     # Send to HF Space using streaming-friendly predict
     client = Client(HF_SPACE)
     try:
+        # client.predict takes the inputs as individual arguments in a list/tuple
+        # The order must match the server function signature:
+        resp = client.predict(
+            payload["hf_token_input"],
+            payload["robot_id_input"],
+            payload["image_b64_input"],
+            api_name=API_NAME
+        )
+        # The server response structure uses a nested 'result' key in the dict
+        vlm_result = resp.get("result", {})
+        description_out = vlm_result.get("description", "")
+        human_out = vlm_result.get("human", "")
+        objects_list = vlm_result.get("objects", [])
+        environment_out = vlm_result.get("environment", "")
         objects_str = ", ".join(objects_list) if isinstance(objects_list, list) else str(objects_list)
         return (
+            description_out,
+            human_out,
             objects_str,
+            environment_out
         )
     except Exception as e:
+        # Print the error to the local console for debugging
+        print(f"Error calling remote Gradio API: {e}")
         return f"Error: {e}", "", "", ""
             sources=["upload", "webcam"],
             type="pil"
         )
+        with gr.Column():
+            description_out = gr.Textbox(label="Description")
+            human_out = gr.Textbox(label="Human")
+            objects_out = gr.Textbox(label="Objects")
+            environment_out = gr.Textbox(label="Environment")
+    # Gradio handles the local streaming loop
     webcam_input.stream(
         process_webcam_stream,
         inputs=[webcam_input],