OppaAI commited on
Commit
1fb1e3b
·
verified ·
1 Parent(s): b18ef1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -42
app.py CHANGED
@@ -3,24 +3,36 @@ import base64
3
  import time
4
  import io
5
  import gradio as gr
6
- from gradio_client import Client
 
 
 
 
7
  from dotenv import load_dotenv
8
 
9
  # Load environment variables (ensure .env is set up locally)
10
  load_dotenv()
11
 
12
- ROBOT_ID = os.environ.get("ROBOT_ID", "unknown") # Default to 'unknown' if missing
13
  HF_TOKEN = os.environ.get("HF_CV_ROBOT_TOKEN")
14
  if not HF_TOKEN:
15
- # If token is missing, the API call will likely fail, but we can proceed
16
  print("Warning: HF_TOKEN not found. API calls may fail.")
17
 
18
- HF_SPACE = "OppaAI/Robot_MCP_Server" # HF Space name
19
- API_NAME = "/predict"
 
 
 
20
 
21
 
22
- def process_webcam_stream(image):
23
- """Send webcam image to HF MCP Server and get result"""
 
 
 
 
 
 
24
  if image is None:
25
  return "", "", "", ""
26
 
@@ -29,51 +41,49 @@ def process_webcam_stream(image):
29
  image.save(buffered, format="JPEG")
30
  b64_img = base64.b64encode(buffered.getvalue()).decode("utf-8")
31
 
32
- # Prepare payload with the CORRECT keys matching the server function arguments
33
- # Server expects: hf_token_input, robot_id_input, image_b64_input
34
  payload = {
35
  "hf_token_input": HF_TOKEN,
36
  "robot_id_input": ROBOT_ID,
37
- # "timestamp": time.time(), # Server function doesn't use this, so we remove it
38
  "image_b64_input": b64_img
39
  }
40
 
41
- # Send to HF Space using streaming-friendly predict
42
- client = Client(HF_SPACE)
43
  try:
44
- # client.predict takes the inputs as individual arguments in a list/tuple
45
- # The order must match the server function signature:
46
- resp = client.predict(
47
- payload["hf_token_input"],
48
- payload["robot_id_input"],
49
- payload["image_b64_input"],
50
- api_name=API_NAME
51
- )
52
-
53
- # The server response structure uses a nested 'result' key in the dict
54
- vlm_result = resp.get("result", {})
55
-
56
- description_out = vlm_result.get("description", "")
57
- human_out = vlm_result.get("human", "")
58
- objects_list = vlm_result.get("objects", [])
59
- environment_out = vlm_result.get("environment", "")
60
-
61
- objects_str = ", ".join(objects_list) if isinstance(objects_list, list) else str(objects_list)
62
-
63
- return (
64
- description_out,
65
- human_out,
66
- objects_str,
67
- environment_out
68
- )
 
 
 
69
  except Exception as e:
70
- # Print the error to the local console for debugging
71
- print(f"Error calling remote Gradio API: {e}")
72
  return f"Error: {e}", "", "", ""
73
 
74
 
75
  with gr.Blocks() as demo:
76
- gr.Markdown("## 🎥 Robot Vision Webcam Stream")
77
 
78
  with gr.Row():
79
  webcam_input = gr.Image(
@@ -87,9 +97,9 @@ with gr.Blocks() as demo:
87
  objects_out = gr.Textbox(label="Objects")
88
  environment_out = gr.Textbox(label="Environment")
89
 
90
- # Gradio handles the local streaming loop
91
  webcam_input.stream(
92
- process_webcam_stream,
93
  inputs=[webcam_input],
94
  outputs=[description_out, human_out, objects_out, environment_out],
95
  stream_every=0.5
 
3
  import time
4
  import io
5
  import gradio as gr
6
+ # Replace gradio_client with fastmcp Client and transport
7
+ from fastmcp import Client
8
+ from fastmcp.client import StreamableHttpTransport
9
+ # Import asyncio to manage async calls within the stream function
10
+ import asyncio
11
  from dotenv import load_dotenv
12
 
13
  # Load environment variables (ensure .env is set up locally)
14
  load_dotenv()
15
 
16
+ ROBOT_ID = os.environ.get("ROBOT_ID", "unknown")
17
  HF_TOKEN = os.environ.get("HF_CV_ROBOT_TOKEN")
18
  if not HF_TOKEN:
 
19
  print("Warning: HF_TOKEN not found. API calls may fail.")
20
 
21
+ # The MCP URL of your remote server
22
+ MCP_SERVER_URL = "https://oppaai-robot-mcp-server.hf.space/gradio_api/mcp/"
23
+ SERVER_NAME = "Robot_MCP_Server"
24
+ # The exact tool name that matches the server function:
25
+ TOOL_NAME = "Robot_MCP_Server_gradio_ui_with_base64_fields"
26
 
27
 
28
+ # Initialize the MCP client globally
29
+ HTTP_TRANSPORT = StreamableHttpTransport(url=MCP_SERVER_URL)
30
+ MCP_CLIENT = Client(transport=HTTP_TRANSPORT, name=SERVER_NAME)
31
+
32
+
33
+ # This function needs to be an async function because client.call_tool is async
34
+ async def process_webcam_stream_async(image):
35
+ """Send webcam image to HF MCP Server using MCP protocol and get result"""
36
  if image is None:
37
  return "", "", "", ""
38
 
 
41
  image.save(buffered, format="JPEG")
42
  b64_img = base64.b64encode(buffered.getvalue()).decode("utf-8")
43
 
44
+ # Prepare payload using the keys the server expects (from the working client)
 
45
  payload = {
46
  "hf_token_input": HF_TOKEN,
47
  "robot_id_input": ROBOT_ID,
 
48
  "image_b64_input": b64_img
49
  }
50
 
 
 
51
  try:
52
+ # Use the global client instance to call the tool asynchronously
53
+ async with MCP_CLIENT:
54
+ response = await MCP_CLIENT.call_tool(TOOL_NAME, payload)
55
+
56
+ if response.is_error:
57
+ error_text = response.content.text if response.content else "Unknown error"
58
+ raise Exception(f"MCP Tool Error: {error_text}")
59
+
60
+ # Parse the JSON string response from the server's output
61
+ import json
62
+ response_dict = json.loads(response.content.text)
63
+
64
+ vlm_result = response_dict.get("result", {})
65
+
66
+ description_out = vlm_result.get("description", "")
67
+ human_out = vlm_result.get("human", "")
68
+ objects_list = vlm_result.get("objects", [])
69
+ environment_out = vlm_result.get("environment", "")
70
+
71
+ objects_str = ", ".join(objects_list) if isinstance(objects_list, list) else str(objects_list)
72
+
73
+ return (
74
+ description_out,
75
+ human_out,
76
+ objects_str,
77
+ environment_out
78
+ )
79
+
80
  except Exception as e:
81
+ print(f"Error calling remote MCP API: {e}")
 
82
  return f"Error: {e}", "", "", ""
83
 
84
 
85
  with gr.Blocks() as demo:
86
+ gr.Markdown("## 🎥 Robot Vision Webcam Stream (using MCP Client)")
87
 
88
  with gr.Row():
89
  webcam_input = gr.Image(
 
97
  objects_out = gr.Textbox(label="Objects")
98
  environment_out = gr.Textbox(label="Environment")
99
 
100
+ # Gradio handles the local streaming loop and automatically wraps async functions
101
  webcam_input.stream(
102
+ process_webcam_stream_async, # Use the async function here
103
  inputs=[webcam_input],
104
  outputs=[description_out, human_out, objects_out, environment_out],
105
  stream_every=0.5