Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -20,8 +20,9 @@ TOOL_NAME = "Robot_MCP_Server_robot_watch"
|
|
| 20 |
HTTP_TRANSPORT = StreamableHttpTransport(url=MCP_SERVER_URL)
|
| 21 |
MCP_CLIENT = Client(transport=HTTP_TRANSPORT, name=SERVER_NAME)
|
| 22 |
|
|
|
|
| 23 |
# -------------------------------
|
| 24 |
-
# Async function using user's token
|
| 25 |
# -------------------------------
|
| 26 |
async def process_webcam_stream_async(image, oauth_token: gr.OAuthToken | None):
|
| 27 |
"""
|
|
@@ -33,7 +34,7 @@ async def process_webcam_stream_async(image, oauth_token: gr.OAuthToken | None):
|
|
| 33 |
if image is None:
|
| 34 |
return "", "", "", "", "", "", "", ""
|
| 35 |
|
| 36 |
-
# Convert image to Base64
|
| 37 |
buffered = io.BytesIO()
|
| 38 |
image.save(buffered, format="JPEG")
|
| 39 |
b64_img = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
|
@@ -56,6 +57,7 @@ async def process_webcam_stream_async(image, oauth_token: gr.OAuthToken | None):
|
|
| 56 |
response_dict = ast.literal_eval(raw_text)
|
| 57 |
vlm_result = response_dict.get("result", {})
|
| 58 |
|
|
|
|
| 59 |
description_out = vlm_result.get("description", "")
|
| 60 |
human_out = vlm_result.get("human", "")
|
| 61 |
environment_out = vlm_result.get("environment", "")
|
|
@@ -65,6 +67,7 @@ async def process_webcam_stream_async(image, oauth_token: gr.OAuthToken | None):
|
|
| 65 |
hazards_list = vlm_result.get("hazards", [])
|
| 66 |
objects_list = vlm_result.get("objects", [])
|
| 67 |
|
|
|
|
| 68 |
objects_str = ", ".join(objects_list) if isinstance(objects_list, list) else str(objects_list)
|
| 69 |
animals_str = ", ".join(animals_list) if isinstance(animals_list, list) else str(animals_list)
|
| 70 |
hazards_str = ", ".join(hazards_list) if isinstance(hazards_list, list) else str(hazards_list)
|
|
@@ -81,15 +84,17 @@ async def process_webcam_stream_async(image, oauth_token: gr.OAuthToken | None):
|
|
| 81 |
)
|
| 82 |
|
| 83 |
except Exception as e:
|
| 84 |
-
print(f"Error calling
|
| 85 |
import traceback
|
| 86 |
traceback.print_exc()
|
| 87 |
return f"Error: {e}", "", "", "", "", "", "", ""
|
| 88 |
|
|
|
|
| 89 |
# -------------------------------
|
| 90 |
# Gradio UI
|
| 91 |
# -------------------------------
|
| 92 |
with gr.Blocks() as demo:
|
|
|
|
| 93 |
gr.LoginButton()
|
| 94 |
gr.Markdown("## 🎥 Robot Vision Webcam Stream (MCP Client)")
|
| 95 |
with gr.Row():
|
|
@@ -104,9 +109,10 @@ with gr.Blocks() as demo:
|
|
| 104 |
objects_out = gr.Textbox(label="Objects Detected", lines=2)
|
| 105 |
hazards_out = gr.Textbox(label="Hazards Identified", lines=2)
|
| 106 |
|
|
|
|
| 107 |
webcam_input.stream(
|
| 108 |
process_webcam_stream_async,
|
| 109 |
-
inputs=[webcam_input
|
| 110 |
outputs=[
|
| 111 |
description_out,
|
| 112 |
environment_out,
|
|
|
|
| 20 |
HTTP_TRANSPORT = StreamableHttpTransport(url=MCP_SERVER_URL)
|
| 21 |
MCP_CLIENT = Client(transport=HTTP_TRANSPORT, name=SERVER_NAME)
|
| 22 |
|
| 23 |
+
|
| 24 |
# -------------------------------
|
| 25 |
+
# Async function using user's HF token
|
| 26 |
# -------------------------------
|
| 27 |
async def process_webcam_stream_async(image, oauth_token: gr.OAuthToken | None):
|
| 28 |
"""
|
|
|
|
| 34 |
if image is None:
|
| 35 |
return "", "", "", "", "", "", "", ""
|
| 36 |
|
| 37 |
+
# Convert image to Base64
|
| 38 |
buffered = io.BytesIO()
|
| 39 |
image.save(buffered, format="JPEG")
|
| 40 |
b64_img = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
|
|
|
| 57 |
response_dict = ast.literal_eval(raw_text)
|
| 58 |
vlm_result = response_dict.get("result", {})
|
| 59 |
|
| 60 |
+
# Extract all fields
|
| 61 |
description_out = vlm_result.get("description", "")
|
| 62 |
human_out = vlm_result.get("human", "")
|
| 63 |
environment_out = vlm_result.get("environment", "")
|
|
|
|
| 67 |
hazards_list = vlm_result.get("hazards", [])
|
| 68 |
objects_list = vlm_result.get("objects", [])
|
| 69 |
|
| 70 |
+
# Convert lists to strings
|
| 71 |
objects_str = ", ".join(objects_list) if isinstance(objects_list, list) else str(objects_list)
|
| 72 |
animals_str = ", ".join(animals_list) if isinstance(animals_list, list) else str(animals_list)
|
| 73 |
hazards_str = ", ".join(hazards_list) if isinstance(hazards_list, list) else str(hazards_list)
|
|
|
|
| 84 |
)
|
| 85 |
|
| 86 |
except Exception as e:
|
| 87 |
+
print(f"Error calling MCP API: {e}")
|
| 88 |
import traceback
|
| 89 |
traceback.print_exc()
|
| 90 |
return f"Error: {e}", "", "", "", "", "", "", ""
|
| 91 |
|
| 92 |
+
|
| 93 |
# -------------------------------
|
| 94 |
# Gradio UI
|
| 95 |
# -------------------------------
|
| 96 |
with gr.Blocks() as demo:
|
| 97 |
+
# Hugging Face OAuth login button
|
| 98 |
gr.LoginButton()
|
| 99 |
gr.Markdown("## 🎥 Robot Vision Webcam Stream (MCP Client)")
|
| 100 |
with gr.Row():
|
|
|
|
| 109 |
objects_out = gr.Textbox(label="Objects Detected", lines=2)
|
| 110 |
hazards_out = gr.Textbox(label="Hazards Identified", lines=2)
|
| 111 |
|
| 112 |
+
# Only webcam input in inputs; Gradio automatically injects oauth_token
|
| 113 |
webcam_input.stream(
|
| 114 |
process_webcam_stream_async,
|
| 115 |
+
inputs=[webcam_input],
|
| 116 |
outputs=[
|
| 117 |
description_out,
|
| 118 |
environment_out,
|