Spaces:

OppaAI
/

Robot_MCP_Client

Sleeping

App Files Files Community

OppaAI commited on Nov 23, 2025

Commit

27c0f8e

verified ·

1 Parent(s): 9b0c24e

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -71

app.py CHANGED Viewed

@@ -1,35 +1,27 @@
 import os
-import cv2
 import base64
 import time
-import requests
-from io import BytesIO
-from typing import Dict, Any
 import gradio as gr
 from dotenv import load_dotenv
 from rich.console import Console
 from rich.table import Table
 from rich import box
-# ------------------------------
-# Environment
-# ------------------------------
 load_dotenv()
-ROBOT_ID = os.environ.get("ROBOT_ID", "robot_001")
 HF_TOKEN = os.environ.get("HF_CV_ROBOT_TOKEN")
-MCP_URL = os.environ.get("MCP_SERVER_URL", "http://localhost:7860/run_tool/robot_watch")  # Replace with actual URL
 console = Console()
-# ------------------------------
-# Rich table helper
-# ------------------------------
-def format_response(resp: Dict[str, Any]):
-    """Return a string for Gradio display with similar formatting to terminal rich table."""
-    objects_list = resp.get("objects", [])
-    objects_str = ", ".join(objects_list) if isinstance(objects_list, list) else str(objects_list)
     table = Table(
         title="😎 Robot Vision Result",
         title_style="bold cyan",
@@ -39,6 +31,10 @@ def format_response(resp: Dict[str, Any]):
         show_header=False,
         style="bold cyan"
     )
     table.add_column("Field", style="bold magenta")
     table.add_column("Value", style="white")
@@ -49,75 +45,62 @@ def format_response(resp: Dict[str, Any]):
     table.add_row("📦 Objects", objects_str)
     table.add_row("🏛️  Environment", str(resp.get("environment", "N/A")))
-    # Render as string for Gradio display
-    from rich.console import Console
-    from io import StringIO
-    s = StringIO()
-    temp_console = Console(file=s, force_terminal=True, color_system="truecolor", width=120)
-    temp_console.print(table)
-    return s.getvalue()
-# ------------------------------
-# Capture & call MCP tool
-# ------------------------------
-def process_frame_stream() -> Dict[str, Any]:
-    """Capture frame, send to MCP server, and return dict for Gradio."""
-    cap = cv2.VideoCapture(0)
-    if not cap.isOpened():
-        return {"result": "Camera not opened", "image": None}
-    ret, frame = cap.read()
-    cap.release()
-    if not ret:
-        return {"result": "Failed to read frame", "image": None}
-    # Encode image as JPEG + base64
-    ok, jpeg = cv2.imencode(".jpg", frame)
-    if not ok:
-        return {"result": "Failed to encode frame", "image": None}
-    b64_img = base64.b64encode(jpeg.tobytes()).decode("utf-8")
-    # Payload for MCP server
     payload = {
         "image_b64": b64_img,
         "robot_id": ROBOT_ID,
         "hf_token": HF_TOKEN
     }
     try:
-        # Streamable POST request to MCP
-        response = requests.post(MCP_URL, json=payload, stream=True)
-        response.raise_for_status()
-        # MCP returns JSON
-        resp_json = response.json()
-        # Convert response into rich table string
-        table_str = format_response(resp_json)
-        # Decode frame for display in Gradio
-        img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-        return {"result": table_str, "image": img_rgb}
     except Exception as e:
-        return {"result": f"Error calling MCP: {e}", "image": None}
-# ------------------------------
-# Gradio Interface
-# ------------------------------
-with gr.Blocks(title="Robot Vision Stream") as app:
-    with gr.Row():
-        output_text = gr.Textbox(label="Result", lines=20, interactive=False, placeholder="MCP results will appear here")
-        output_image = gr.Image(label="Camera Frame", type="numpy")
-    # Stream button triggers frame capture every 1 second
-    gr.Button("Capture & Analyze").click(fn=process_frame_stream, outputs=[output_text, output_image])
 if __name__ == "__main__":
-    app.launch()

 import os
 import base64
 import time
+import json
 import gradio as gr
+from gradio_client import Client
 from dotenv import load_dotenv
 from rich.console import Console
 from rich.table import Table
 from rich import box
+# Load environment variables
 load_dotenv()
+ROBOT_ID = os.environ.get("ROBOT_ID")
 HF_TOKEN = os.environ.get("HF_CV_ROBOT_TOKEN")
+HF_SPACE = "OppaAI/Robot_MCP_Server"
+API_NAME = "/predict"
 console = Console()
+def pretty_print_response(resp: dict):
+    """Rich table output with row lines, no URL."""
     table = Table(
         title="😎 Robot Vision Result",
         title_style="bold cyan",
         show_header=False,
         style="bold cyan"
     )
+    objects_list = resp.get("objects", [])
+    objects_str = ", ".join(objects_list) if isinstance(objects_list, list) else str(objects_list)
     table.add_column("Field", style="bold magenta")
     table.add_column("Value", style="white")
     table.add_row("📦 Objects", objects_str)
     table.add_row("🏛️  Environment", str(resp.get("environment", "N/A")))
+    console.print(table)
+    return resp.get("description", ""), resp.get("human", ""), objects_str, resp.get("environment", "")
+def process_webcam_stream(image):
+    """Send webcam image to HF MCP Server and get result"""
+    if image is None:
+        return None, None, None, None
+    # Convert to base64
+    import io
+    from PIL import Image
+    buffered = io.BytesIO()
+    img = Image.fromarray(image)
+    img.save(buffered, format="JPEG")
+    b64_img = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    # Prepare payload
     payload = {
         "image_b64": b64_img,
         "robot_id": ROBOT_ID,
+        "timestamp": time.time(),
         "hf_token": HF_TOKEN
     }
+    # Send to HF Space using streaming-friendly predict
+    client = Client(HF_SPACE)
     try:
+        resp = client.predict(payload, api_name=API_NAME)
+        # Print table in console
+        pretty_print_response(resp)
+        # Return selected fields for Gradio display
+        return resp.get("description", ""), resp.get("human", ""), ", ".join(resp.get("objects", [])), resp.get("environment", "")
     except Exception as e:
+        console.print(f"[bold red]Error sending to HF:[/bold red] {e}")
+        return None, None, None, None
+with gr.Blocks() as demo:
+    gr.Markdown("## 🎥 Robot Vision Webcam Stream")
+    with gr.Row():
+        webcam_input = gr.Image(source="webcam", streaming=True, label="Webcam Input")
+        description_out = gr.Textbox(label="Description")
+        human_out = gr.Textbox(label="Human")
+        objects_out = gr.Textbox(label="Objects")
+        environment_out = gr.Textbox(label="Environment")
+    # Connect streaming
+    webcam_input.stream(
+        process_webcam_stream,
+        inputs=[webcam_input],
+        outputs=[description_out, human_out, objects_out, environment_out],
+        stream_every=0.5
+    )
 if __name__ == "__main__":
+    demo.launch()