OppaAI commited on
Commit
ef5183f
·
verified ·
1 Parent(s): 79f6e03

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -34
app.py CHANGED
@@ -6,6 +6,13 @@ from fastmcp.client import StreamableHttpTransport
6
  import asyncio
7
  import ast
8
  import json
 
 
 
 
 
 
 
9
 
10
  # -------------------------------
11
  # MCP server info
@@ -22,72 +29,61 @@ HTTP_TRANSPORT = StreamableHttpTransport(url=MCP_SERVER_URL)
22
  MCP_CLIENT = Client(transport=HTTP_TRANSPORT, name=SERVER_NAME)
23
 
24
  # -------------------------------
25
- # Async function using user's HF token
26
  # -------------------------------
27
  async def process_webcam_stream_async(image, oauth_token: gr.OAuthToken | None = None):
28
- """
29
- Send webcam image to MCP server using user's HF token and process the response.
30
- """
31
- # 1. CHECK LOGIN
32
  if oauth_token is None:
33
- return "Please log in using the button above.", "", "", "", "", "", "", ""
34
 
35
- # 2. CHECK IMAGE
36
  if image is None:
37
  return "", "", "", "", "", "", "", ""
38
 
39
  try:
40
- # 3. PREPARE IMAGE
41
  buffered = io.BytesIO()
42
  image.save(buffered, format="JPEG")
43
  b64_img = base64.b64encode(buffered.getvalue()).decode("utf-8")
44
 
45
- # 4. PREPARE PAYLOAD
46
  payload = {
47
  "hf_token_input": oauth_token.token,
48
  "robot_id_input": ROBOT_ID,
49
  "image_b64_input": b64_img
50
  }
51
 
52
- # 5. CALL MCP SERVER
53
  async with MCP_CLIENT:
54
  response = await MCP_CLIENT.call_tool(TOOL_NAME, payload)
55
 
 
56
  if response.is_error:
57
- # Handle error content safely
58
  error_msg = "Unknown Error"
59
  if hasattr(response, 'content') and isinstance(response.content, list):
60
  error_msg = " ".join([getattr(item, 'text', '') for item in response.content])
61
  raise Exception(f"MCP Tool Error: {error_msg}")
62
 
63
- # ---------------------------------------------------------
64
- # FIX: Handle List Content
65
- # The 'content' is a list of objects (e.g., TextContent).
66
- # We iterate through the list and join the text parts.
67
- # ---------------------------------------------------------
68
  raw_text = ""
69
  if hasattr(response, 'content') and isinstance(response.content, list):
70
  for item in response.content:
71
- # Check if the item has a 'text' attribute
72
  if hasattr(item, 'text'):
73
  raw_text += item.text
74
  else:
75
- # Fallback for unexpected structure
76
  raw_text = str(response)
77
 
78
- # 6. PARSE RESPONSE
79
  try:
80
  response_dict = json.loads(raw_text)
81
  except json.JSONDecodeError:
82
  try:
83
  response_dict = ast.literal_eval(raw_text)
84
  except Exception:
85
- # If parsing fails completely, return the raw text in description
86
  return f"Parsing Error. Raw output: {raw_text}", "", "", "", "", "", "", ""
87
 
88
  vlm_result = response_dict.get("result", {})
89
 
90
- # 7. EXTRACT DATA
91
  description_out = vlm_result.get("description", "")
92
  environment_out = vlm_result.get("environment", "")
93
  indoor_outdoor_out = vlm_result.get("indoor_or_outdoor", "")
@@ -111,33 +107,44 @@ async def process_webcam_stream_async(image, oauth_token: gr.OAuthToken | None =
111
  )
112
 
113
  except Exception as e:
114
- print(f"Error calling MCP API: {e}")
115
  return f"Error: {e}", "", "", "", "", "", "", ""
116
 
117
 
118
  # -------------------------------
119
  # Gradio UI
120
  # -------------------------------
121
- with gr.Blocks() as demo:
122
  gr.Markdown("## 🎥 Robot Vision Webcam Stream (MCP Client)")
 
 
123
  gr.LoginButton()
124
 
125
  with gr.Row():
126
  webcam_input = gr.Image(
127
- label="Captured from Web-Cam",
128
  sources=["webcam"],
129
  type="pil"
130
  )
 
131
  with gr.Column():
132
- description_out = gr.Textbox(label="Description", lines=5)
133
- environment_out = gr.Textbox(label="Environment", lines=3)
134
- indoor_outdoor_out = gr.Textbox(label="Indoor/Outdoor", lines=1)
135
- lighting_condition_out = gr.Textbox(label="Lighting Condition", lines=1)
136
- human_out = gr.Textbox(label="Human Detected", lines=3)
137
- animals_out = gr.Textbox(label="Animals Detected", lines=2)
138
- objects_out = gr.Textbox(label="Objects Detected", lines=2)
139
- hazards_out = gr.Textbox(label="Hazards Identified", lines=2)
140
-
 
 
 
 
 
 
 
 
141
  webcam_input.stream(
142
  process_webcam_stream_async,
143
  inputs=[webcam_input],
@@ -151,7 +158,10 @@ with gr.Blocks() as demo:
151
  objects_out,
152
  hazards_out
153
  ],
154
- stream_every=1.0
 
 
 
155
  )
156
 
157
  if __name__ == "__main__":
 
6
  import asyncio
7
  import ast
8
  import json
9
+ import warnings
10
+
11
+ # -------------------------------
12
+ # 0. CLEANUP: Ignore the spammy DeprecationWarnings
13
+ # -------------------------------
14
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
15
+ warnings.filterwarnings("ignore", category=UserWarning)
16
 
17
  # -------------------------------
18
  # MCP server info
 
29
  MCP_CLIENT = Client(transport=HTTP_TRANSPORT, name=SERVER_NAME)
30
 
31
  # -------------------------------
32
+ # Async function
33
  # -------------------------------
34
  async def process_webcam_stream_async(image, oauth_token: gr.OAuthToken | None = None):
35
+ # 1. Login Check
 
 
 
36
  if oauth_token is None:
37
+ return "⚠️ Please log in via the button above to start.", "", "", "", "", "", "", ""
38
 
39
+ # 2. Image Check
40
  if image is None:
41
  return "", "", "", "", "", "", "", ""
42
 
43
  try:
44
+ # 3. Process Image
45
  buffered = io.BytesIO()
46
  image.save(buffered, format="JPEG")
47
  b64_img = base64.b64encode(buffered.getvalue()).decode("utf-8")
48
 
 
49
  payload = {
50
  "hf_token_input": oauth_token.token,
51
  "robot_id_input": ROBOT_ID,
52
  "image_b64_input": b64_img
53
  }
54
 
55
+ # 4. Call MCP Server
56
  async with MCP_CLIENT:
57
  response = await MCP_CLIENT.call_tool(TOOL_NAME, payload)
58
 
59
+ # Handle MCP Errors
60
  if response.is_error:
 
61
  error_msg = "Unknown Error"
62
  if hasattr(response, 'content') and isinstance(response.content, list):
63
  error_msg = " ".join([getattr(item, 'text', '') for item in response.content])
64
  raise Exception(f"MCP Tool Error: {error_msg}")
65
 
66
+ # 5. Extract Text from Response List
 
 
 
 
67
  raw_text = ""
68
  if hasattr(response, 'content') and isinstance(response.content, list):
69
  for item in response.content:
 
70
  if hasattr(item, 'text'):
71
  raw_text += item.text
72
  else:
 
73
  raw_text = str(response)
74
 
75
+ # 6. Parse JSON/Dict
76
  try:
77
  response_dict = json.loads(raw_text)
78
  except json.JSONDecodeError:
79
  try:
80
  response_dict = ast.literal_eval(raw_text)
81
  except Exception:
 
82
  return f"Parsing Error. Raw output: {raw_text}", "", "", "", "", "", "", ""
83
 
84
  vlm_result = response_dict.get("result", {})
85
 
86
+ # 7. Map to Outputs
87
  description_out = vlm_result.get("description", "")
88
  environment_out = vlm_result.get("environment", "")
89
  indoor_outdoor_out = vlm_result.get("indoor_or_outdoor", "")
 
107
  )
108
 
109
  except Exception as e:
110
+ print(f"Error: {e}")
111
  return f"Error: {e}", "", "", "", "", "", "", ""
112
 
113
 
114
  # -------------------------------
115
  # Gradio UI
116
  # -------------------------------
117
+ with gr.Blocks(title="Robot Vision MCP") as demo:
118
  gr.Markdown("## 🎥 Robot Vision Webcam Stream (MCP Client)")
119
+
120
+ # Login Button
121
  gr.LoginButton()
122
 
123
  with gr.Row():
124
  webcam_input = gr.Image(
125
+ label="Webcam Input",
126
  sources=["webcam"],
127
  type="pil"
128
  )
129
+
130
  with gr.Column():
131
+ description_out = gr.Textbox(label="Description", lines=4)
132
+ with gr.Row():
133
+ environment_out = gr.Textbox(label="Environment")
134
+ indoor_outdoor_out = gr.Textbox(label="In/Out")
135
+
136
+ with gr.Row():
137
+ human_out = gr.Textbox(label="Humans")
138
+ hazards_out = gr.Textbox(label="Hazards")
139
+
140
+ # Hidden / Extra fields (optional, add back if needed)
141
+ lighting_condition_out = gr.Textbox(visible=False)
142
+ animals_out = gr.Textbox(visible=False)
143
+ objects_out = gr.Textbox(visible=False)
144
+
145
+ # -------------------------------
146
+ # STREAM CONFIGURATION (The Important Fix)
147
+ # -------------------------------
148
  webcam_input.stream(
149
  process_webcam_stream_async,
150
  inputs=[webcam_input],
 
158
  objects_out,
159
  hazards_out
160
  ],
161
+ # Update every 3 seconds to give the AI time to think
162
+ stream_every=3.0,
163
+ # Wait for the previous request to finish before sending a new one
164
+ concurrency_limit=1
165
  )
166
 
167
  if __name__ == "__main__":