OppaAI commited on
Commit
9ac3c29
·
verified ·
1 Parent(s): ef5183f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -57
app.py CHANGED
@@ -6,18 +6,16 @@ from fastmcp.client import StreamableHttpTransport
6
  import asyncio
7
  import ast
8
  import json
9
- import warnings
10
-
11
- # -------------------------------
12
- # 0. CLEANUP: Ignore the spammy DeprecationWarnings
13
- # -------------------------------
14
- warnings.filterwarnings("ignore", category=DeprecationWarning)
15
- warnings.filterwarnings("ignore", category=UserWarning)
16
 
17
  # -------------------------------
18
  # MCP server info
19
  # -------------------------------
20
  ROBOT_ID = "Robot_MCP_Client"
 
 
 
 
 
21
  MCP_SERVER_URL = "https://oppaai-robot-mcp-server.hf.space/gradio_api/mcp/"
22
  SERVER_NAME = "Robot_MCP_Server"
23
  TOOL_NAME = "Robot_MCP_Server_robot_watch"
@@ -29,61 +27,65 @@ HTTP_TRANSPORT = StreamableHttpTransport(url=MCP_SERVER_URL)
29
  MCP_CLIENT = Client(transport=HTTP_TRANSPORT, name=SERVER_NAME)
30
 
31
  # -------------------------------
32
- # Async function
33
  # -------------------------------
34
- async def process_webcam_stream_async(image, oauth_token: gr.OAuthToken | None = None):
35
- # 1. Login Check
36
- if oauth_token is None:
37
- return "⚠️ Please log in via the button above to start.", "", "", "", "", "", "", ""
38
-
39
- # 2. Image Check
40
  if image is None:
41
  return "", "", "", "", "", "", "", ""
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  try:
44
- # 3. Process Image
45
- buffered = io.BytesIO()
46
- image.save(buffered, format="JPEG")
47
- b64_img = base64.b64encode(buffered.getvalue()).decode("utf-8")
48
-
49
- payload = {
50
- "hf_token_input": oauth_token.token,
51
- "robot_id_input": ROBOT_ID,
52
- "image_b64_input": b64_img
53
- }
54
-
55
- # 4. Call MCP Server
56
  async with MCP_CLIENT:
57
  response = await MCP_CLIENT.call_tool(TOOL_NAME, payload)
58
 
59
- # Handle MCP Errors
60
  if response.is_error:
 
61
  error_msg = "Unknown Error"
62
  if hasattr(response, 'content') and isinstance(response.content, list):
63
  error_msg = " ".join([getattr(item, 'text', '') for item in response.content])
64
  raise Exception(f"MCP Tool Error: {error_msg}")
65
 
66
- # 5. Extract Text from Response List
 
 
 
 
67
  raw_text = ""
68
  if hasattr(response, 'content') and isinstance(response.content, list):
69
  for item in response.content:
 
70
  if hasattr(item, 'text'):
71
  raw_text += item.text
72
  else:
 
73
  raw_text = str(response)
74
 
75
- # 6. Parse JSON/Dict
76
  try:
77
  response_dict = json.loads(raw_text)
78
  except json.JSONDecodeError:
79
  try:
80
  response_dict = ast.literal_eval(raw_text)
81
  except Exception:
 
82
  return f"Parsing Error. Raw output: {raw_text}", "", "", "", "", "", "", ""
83
 
84
  vlm_result = response_dict.get("result", {})
85
 
86
- # 7. Map to Outputs
87
  description_out = vlm_result.get("description", "")
88
  environment_out = vlm_result.get("environment", "")
89
  indoor_outdoor_out = vlm_result.get("indoor_or_outdoor", "")
@@ -107,44 +109,33 @@ async def process_webcam_stream_async(image, oauth_token: gr.OAuthToken | None =
107
  )
108
 
109
  except Exception as e:
110
- print(f"Error: {e}")
111
  return f"Error: {e}", "", "", "", "", "", "", ""
112
 
113
 
114
  # -------------------------------
115
  # Gradio UI
116
  # -------------------------------
117
- with gr.Blocks(title="Robot Vision MCP") as demo:
118
  gr.Markdown("## 🎥 Robot Vision Webcam Stream (MCP Client)")
119
-
120
- # Login Button
121
  gr.LoginButton()
122
 
123
  with gr.Row():
124
  webcam_input = gr.Image(
125
- label="Webcam Input",
126
  sources=["webcam"],
127
  type="pil"
128
  )
129
-
130
  with gr.Column():
131
- description_out = gr.Textbox(label="Description", lines=4)
132
- with gr.Row():
133
- environment_out = gr.Textbox(label="Environment")
134
- indoor_outdoor_out = gr.Textbox(label="In/Out")
135
-
136
- with gr.Row():
137
- human_out = gr.Textbox(label="Humans")
138
- hazards_out = gr.Textbox(label="Hazards")
139
-
140
- # Hidden / Extra fields (optional, add back if needed)
141
- lighting_condition_out = gr.Textbox(visible=False)
142
- animals_out = gr.Textbox(visible=False)
143
- objects_out = gr.Textbox(visible=False)
144
-
145
- # -------------------------------
146
- # STREAM CONFIGURATION (The Important Fix)
147
- # -------------------------------
148
  webcam_input.stream(
149
  process_webcam_stream_async,
150
  inputs=[webcam_input],
@@ -158,10 +149,7 @@ with gr.Blocks(title="Robot Vision MCP") as demo:
158
  objects_out,
159
  hazards_out
160
  ],
161
- # Update every 3 seconds to give the AI time to think
162
- stream_every=3.0,
163
- # Wait for the previous request to finish before sending a new one
164
- concurrency_limit=1
165
  )
166
 
167
  if __name__ == "__main__":
 
6
  import asyncio
7
  import ast
8
  import json
 
 
 
 
 
 
 
9
 
10
  # -------------------------------
11
  # MCP server info
12
  # -------------------------------
13
  ROBOT_ID = "Robot_MCP_Client"
14
+ HF_TOKEN = os.environ.get("HF_TOKEN")
15
+ if not HF_TOKEN:
16
+ print("Warning: HF_TOKEN not found. API calls may fail.")
17
+ HF_TOKEN = "missing_token_placeholder"
18
+
19
  MCP_SERVER_URL = "https://oppaai-robot-mcp-server.hf.space/gradio_api/mcp/"
20
  SERVER_NAME = "Robot_MCP_Server"
21
  TOOL_NAME = "Robot_MCP_Server_robot_watch"
 
27
  MCP_CLIENT = Client(transport=HTTP_TRANSPORT, name=SERVER_NAME)
28
 
29
  # -------------------------------
30
+ # Async function using user's HF token
31
  # -------------------------------
32
+ async def process_webcam_stream_async(image):
 
 
 
 
 
33
  if image is None:
34
  return "", "", "", "", "", "", "", ""
35
 
36
+ if HF_TOKEN == "missing_token_placeholder":
37
+ return "Error: HF_TOKEN not set locally.", "", "", "", "", "", "", ""
38
+
39
+ # Convert image to Base64
40
+ buffered = io.BytesIO()
41
+ image.save(buffered, format="JPEG")
42
+ b64_img = base64.b64encode(buffered.getvalue()).decode("utf-8")
43
+
44
+ payload = {
45
+ "hf_token_input": HF_TOKEN,
46
+ "robot_id_input": ROBOT_ID,
47
+ "image_b64_input": b64_img
48
+ }
49
+
50
  try:
 
 
 
 
 
 
 
 
 
 
 
 
51
  async with MCP_CLIENT:
52
  response = await MCP_CLIENT.call_tool(TOOL_NAME, payload)
53
 
 
54
  if response.is_error:
55
+ # Handle error content safely
56
  error_msg = "Unknown Error"
57
  if hasattr(response, 'content') and isinstance(response.content, list):
58
  error_msg = " ".join([getattr(item, 'text', '') for item in response.content])
59
  raise Exception(f"MCP Tool Error: {error_msg}")
60
 
61
+ # ---------------------------------------------------------
62
+ # FIX: Handle List Content
63
+ # The 'content' is a list of objects (e.g., TextContent).
64
+ # We iterate through the list and join the text parts.
65
+ # ---------------------------------------------------------
66
  raw_text = ""
67
  if hasattr(response, 'content') and isinstance(response.content, list):
68
  for item in response.content:
69
+ # Check if the item has a 'text' attribute
70
  if hasattr(item, 'text'):
71
  raw_text += item.text
72
  else:
73
+ # Fallback for unexpected structure
74
  raw_text = str(response)
75
 
76
+ # 6. PARSE RESPONSE
77
  try:
78
  response_dict = json.loads(raw_text)
79
  except json.JSONDecodeError:
80
  try:
81
  response_dict = ast.literal_eval(raw_text)
82
  except Exception:
83
+ # If parsing fails completely, return the raw text in description
84
  return f"Parsing Error. Raw output: {raw_text}", "", "", "", "", "", "", ""
85
 
86
  vlm_result = response_dict.get("result", {})
87
 
88
+ # 7. EXTRACT DATA
89
  description_out = vlm_result.get("description", "")
90
  environment_out = vlm_result.get("environment", "")
91
  indoor_outdoor_out = vlm_result.get("indoor_or_outdoor", "")
 
109
  )
110
 
111
  except Exception as e:
112
+ print(f"Error calling MCP API: {e}")
113
  return f"Error: {e}", "", "", "", "", "", "", ""
114
 
115
 
116
  # -------------------------------
117
  # Gradio UI
118
  # -------------------------------
119
+ with gr.Blocks() as demo:
120
  gr.Markdown("## 🎥 Robot Vision Webcam Stream (MCP Client)")
 
 
121
  gr.LoginButton()
122
 
123
  with gr.Row():
124
  webcam_input = gr.Image(
125
+ label="Captured from Web-Cam",
126
  sources=["webcam"],
127
  type="pil"
128
  )
 
129
  with gr.Column():
130
+ description_out = gr.Textbox(label="Description", lines=5)
131
+ environment_out = gr.Textbox(label="Environment", lines=3)
132
+ indoor_outdoor_out = gr.Textbox(label="Indoor/Outdoor", lines=1)
133
+ lighting_condition_out = gr.Textbox(label="Lighting Condition", lines=1)
134
+ human_out = gr.Textbox(label="Human Detected", lines=3)
135
+ animals_out = gr.Textbox(label="Animals Detected", lines=2)
136
+ objects_out = gr.Textbox(label="Objects Detected", lines=2)
137
+ hazards_out = gr.Textbox(label="Hazards Identified", lines=2)
138
+
 
 
 
 
 
 
 
 
139
  webcam_input.stream(
140
  process_webcam_stream_async,
141
  inputs=[webcam_input],
 
149
  objects_out,
150
  hazards_out
151
  ],
152
+ stream_every=1.0
 
 
 
153
  )
154
 
155
  if __name__ == "__main__":