OppaAI commited on
Commit
79f6e03
·
verified ·
1 Parent(s): 306ab5e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -25
app.py CHANGED
@@ -28,23 +28,23 @@ async def process_webcam_stream_async(image, oauth_token: gr.OAuthToken | None =
28
  """
29
  Send webcam image to MCP server using user's HF token and process the response.
30
  """
31
- # 1. CHECK LOGIN: If no token, ask user to log in
32
  if oauth_token is None:
33
  return "Please log in using the button above.", "", "", "", "", "", "", ""
34
 
35
- # 2. CHECK IMAGE: If camera hasn't loaded yet
36
  if image is None:
37
  return "", "", "", "", "", "", "", ""
38
 
39
  try:
40
- # 3. PREPARE IMAGE: Convert to Base64
41
  buffered = io.BytesIO()
42
  image.save(buffered, format="JPEG")
43
  b64_img = base64.b64encode(buffered.getvalue()).decode("utf-8")
44
 
45
- # 4. PREPARE PAYLOAD: Inject the user's token
46
  payload = {
47
- "hf_token_input": oauth_token.token, # <--- Token used here
48
  "robot_id_input": ROBOT_ID,
49
  "image_b64_input": b64_img
50
  }
@@ -54,17 +54,36 @@ async def process_webcam_stream_async(image, oauth_token: gr.OAuthToken | None =
54
  response = await MCP_CLIENT.call_tool(TOOL_NAME, payload)
55
 
56
  if response.is_error:
57
- error_text = response.content.text if response.content else "Unknown error"
58
- raise Exception(f"MCP Tool Error: {error_text}")
59
-
60
- raw_text = response.content.text
61
-
62
- # 6. PARSE RESPONSE (Handle both JSON and Python Dict strings)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  try:
64
  response_dict = json.loads(raw_text)
65
  except json.JSONDecodeError:
66
- # Fallback if server returns single quotes
67
- response_dict = ast.literal_eval(raw_text)
 
 
 
68
 
69
  vlm_result = response_dict.get("result", {})
70
 
@@ -78,7 +97,6 @@ async def process_webcam_stream_async(image, oauth_token: gr.OAuthToken | None =
78
  objects_list = vlm_result.get("objects", [])
79
  hazards_out = vlm_result.get("hazards", "")
80
 
81
- # Convert list to string
82
  objects_str = ", ".join(objects_list) if isinstance(objects_list, list) else str(objects_list)
83
 
84
  return (
@@ -102,19 +120,14 @@ async def process_webcam_stream_async(image, oauth_token: gr.OAuthToken | None =
102
  # -------------------------------
103
  with gr.Blocks() as demo:
104
  gr.Markdown("## 🎥 Robot Vision Webcam Stream (MCP Client)")
105
-
106
- # The Login Button (Required for oauth_token)
107
  gr.LoginButton()
108
 
109
  with gr.Row():
110
- # Input: Webcam
111
  webcam_input = gr.Image(
112
  label="Captured from Web-Cam",
113
  sources=["webcam"],
114
  type="pil"
115
  )
116
-
117
- # Outputs
118
  with gr.Column():
119
  description_out = gr.Textbox(label="Description", lines=5)
120
  environment_out = gr.Textbox(label="Environment", lines=3)
@@ -125,11 +138,6 @@ with gr.Blocks() as demo:
125
  objects_out = gr.Textbox(label="Objects Detected", lines=2)
126
  hazards_out = gr.Textbox(label="Hazards Identified", lines=2)
127
 
128
- # -------------------------------
129
- # Event Trigger
130
- # -------------------------------
131
- # CRITICAL FIX: Do NOT include gr.OAuthToken() in inputs.
132
- # Gradio automatically injects it because it's in the function signature.
133
  webcam_input.stream(
134
  process_webcam_stream_async,
135
  inputs=[webcam_input],
@@ -147,4 +155,4 @@ with gr.Blocks() as demo:
147
  )
148
 
149
  if __name__ == "__main__":
150
- demo.launch()
 
28
  """
29
  Send webcam image to MCP server using user's HF token and process the response.
30
  """
31
+ # 1. CHECK LOGIN
32
  if oauth_token is None:
33
  return "Please log in using the button above.", "", "", "", "", "", "", ""
34
 
35
+ # 2. CHECK IMAGE
36
  if image is None:
37
  return "", "", "", "", "", "", "", ""
38
 
39
  try:
40
+ # 3. PREPARE IMAGE
41
  buffered = io.BytesIO()
42
  image.save(buffered, format="JPEG")
43
  b64_img = base64.b64encode(buffered.getvalue()).decode("utf-8")
44
 
45
+ # 4. PREPARE PAYLOAD
46
  payload = {
47
+ "hf_token_input": oauth_token.token,
48
  "robot_id_input": ROBOT_ID,
49
  "image_b64_input": b64_img
50
  }
 
54
  response = await MCP_CLIENT.call_tool(TOOL_NAME, payload)
55
 
56
  if response.is_error:
57
+ # Handle error content safely
58
+ error_msg = "Unknown Error"
59
+ if hasattr(response, 'content') and isinstance(response.content, list):
60
+ error_msg = " ".join([getattr(item, 'text', '') for item in response.content])
61
+ raise Exception(f"MCP Tool Error: {error_msg}")
62
+
63
+ # ---------------------------------------------------------
64
+ # FIX: Handle List Content
65
+ # The 'content' is a list of objects (e.g., TextContent).
66
+ # We iterate through the list and join the text parts.
67
+ # ---------------------------------------------------------
68
+ raw_text = ""
69
+ if hasattr(response, 'content') and isinstance(response.content, list):
70
+ for item in response.content:
71
+ # Check if the item has a 'text' attribute
72
+ if hasattr(item, 'text'):
73
+ raw_text += item.text
74
+ else:
75
+ # Fallback for unexpected structure
76
+ raw_text = str(response)
77
+
78
+ # 6. PARSE RESPONSE
79
  try:
80
  response_dict = json.loads(raw_text)
81
  except json.JSONDecodeError:
82
+ try:
83
+ response_dict = ast.literal_eval(raw_text)
84
+ except Exception:
85
+ # If parsing fails completely, return the raw text in description
86
+ return f"Parsing Error. Raw output: {raw_text}", "", "", "", "", "", "", ""
87
 
88
  vlm_result = response_dict.get("result", {})
89
 
 
97
  objects_list = vlm_result.get("objects", [])
98
  hazards_out = vlm_result.get("hazards", "")
99
 
 
100
  objects_str = ", ".join(objects_list) if isinstance(objects_list, list) else str(objects_list)
101
 
102
  return (
 
120
  # -------------------------------
121
  with gr.Blocks() as demo:
122
  gr.Markdown("## 🎥 Robot Vision Webcam Stream (MCP Client)")
 
 
123
  gr.LoginButton()
124
 
125
  with gr.Row():
 
126
  webcam_input = gr.Image(
127
  label="Captured from Web-Cam",
128
  sources=["webcam"],
129
  type="pil"
130
  )
 
 
131
  with gr.Column():
132
  description_out = gr.Textbox(label="Description", lines=5)
133
  environment_out = gr.Textbox(label="Environment", lines=3)
 
138
  objects_out = gr.Textbox(label="Objects Detected", lines=2)
139
  hazards_out = gr.Textbox(label="Hazards Identified", lines=2)
140
 
 
 
 
 
 
141
  webcam_input.stream(
142
  process_webcam_stream_async,
143
  inputs=[webcam_input],
 
155
  )
156
 
157
  if __name__ == "__main__":
158
+ demo.launch(ssr_mode=False)