OppaAI commited on
Commit
7c6b50b
·
verified ·
1 Parent(s): e6f5e33

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +156 -0
app.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import io
3
+ import gradio as gr
4
+ from fastmcp import Client
5
+ from fastmcp.client import StreamableHttpTransport
6
+ import asyncio
7
+ import ast
8
+ import json
9
+ import os
10
+
11
+ # -------------------------------
12
+ # MCP server info
13
+ # -------------------------------
14
+ ROBOT_ID = "Robot_MCP_Client"
15
+ HF_TOKEN = os.environ.get("HF_TOKEN")
16
+ if not HF_TOKEN:
17
+ print("Warning: HF_TOKEN not found. API calls may fail.")
18
+ HF_TOKEN = "missing_token_placeholder"
19
+
20
+ MCP_SERVER_URL = "https://oppaai-robot-mcp-server.hf.space/gradio_api/mcp/"
21
+ SERVER_NAME = "Robot_MCP_Server"
22
+ TOOL_NAME = "Robot_MCP_Server_robot_watch"
23
+
24
+ # -------------------------------
25
+ # Initialize MCP client globally
26
+ # -------------------------------
27
+ HTTP_TRANSPORT = StreamableHttpTransport(url=MCP_SERVER_URL)
28
+ MCP_CLIENT = Client(transport=HTTP_TRANSPORT, name=SERVER_NAME)
29
+
30
+ # -------------------------------
31
+ # Async function using user's HF token
32
+ # -------------------------------
33
+ async def process_webcam_stream_async(image):
34
+ if image is None:
35
+ return "", "", "", "", "", "", "", ""
36
+
37
+ if HF_TOKEN == "missing_token_placeholder":
38
+ return "Error: HF_TOKEN not set locally.", "", "", "", "", "", "", ""
39
+
40
+ # Convert image to Base64
41
+ buffered = io.BytesIO()
42
+ image.save(buffered, format="JPEG")
43
+ b64_img = base64.b64encode(buffered.getvalue()).decode("utf-8")
44
+
45
+ payload = {
46
+ "hf_token_input": HF_TOKEN,
47
+ "robot_id_input": ROBOT_ID,
48
+ "image_b64_input": b64_img
49
+ }
50
+
51
+ try:
52
+ async with MCP_CLIENT:
53
+ response = await MCP_CLIENT.call_tool(TOOL_NAME, payload)
54
+
55
+ if response.is_error:
56
+ # Handle error content safely
57
+ error_msg = "Unknown Error"
58
+ if hasattr(response, 'content') and isinstance(response.content, list):
59
+ error_msg = " ".join([getattr(item, 'text', '') for item in response.content])
60
+ raise Exception(f"MCP Tool Error: {error_msg}")
61
+
62
+ # ---------------------------------------------------------
63
+ # FIX: Handle List Content
64
+ # The 'content' is a list of objects (e.g., TextContent).
65
+ # We iterate through the list and join the text parts.
66
+ # ---------------------------------------------------------
67
+ raw_text = ""
68
+ if hasattr(response, 'content') and isinstance(response.content, list):
69
+ for item in response.content:
70
+ # Check if the item has a 'text' attribute
71
+ if hasattr(item, 'text'):
72
+ raw_text += item.text
73
+ else:
74
+ # Fallback for unexpected structure
75
+ raw_text = str(response)
76
+
77
+ # 6. PARSE RESPONSE
78
+ try:
79
+ response_dict = json.loads(raw_text)
80
+ except json.JSONDecodeError:
81
+ try:
82
+ response_dict = ast.literal_eval(raw_text)
83
+ except Exception:
84
+ # If parsing fails completely, return the raw text in description
85
+ return f"Parsing Error. Raw output: {raw_text}", "", "", "", "", "", "", ""
86
+
87
+ vlm_result = response_dict.get("result", {})
88
+
89
+ # 7. EXTRACT DATA
90
+ description_out = vlm_result.get("description", "")
91
+ environment_out = vlm_result.get("environment", "")
92
+ indoor_outdoor_out = vlm_result.get("indoor_or_outdoor", "")
93
+ lighting_condition_out = vlm_result.get("lighting_condition", "")
94
+ human_out = vlm_result.get("human", "")
95
+ animals_out = vlm_result.get("animals", "")
96
+ objects_list = vlm_result.get("objects", [])
97
+ hazards_out = vlm_result.get("hazards", "")
98
+
99
+ objects_str = ", ".join(objects_list) if isinstance(objects_list, list) else str(objects_list)
100
+
101
+ return (
102
+ description_out,
103
+ environment_out,
104
+ indoor_outdoor_out,
105
+ lighting_condition_out,
106
+ human_out,
107
+ animals_out,
108
+ objects_str,
109
+ hazards_out
110
+ )
111
+
112
+ except Exception as e:
113
+ print(f"Error calling MCP API: {e}")
114
+ return f"Error: {e}", "", "", "", "", "", "", ""
115
+
116
+
117
+ # -------------------------------
118
+ # Gradio UI
119
+ # -------------------------------
120
+ with gr.Blocks() as demo:
121
+ gr.Markdown("## 🎥 Robot Vision Webcam Stream (MCP Client)")
122
+
123
+ with gr.Row():
124
+ webcam_input = gr.Image(
125
+ label="Captured from Web-Cam",
126
+ sources=["webcam"],
127
+ type="pil"
128
+ )
129
+ with gr.Column():
130
+ description_out = gr.Textbox(label="Description", lines=5)
131
+ environment_out = gr.Textbox(label="Environment", lines=3)
132
+ indoor_outdoor_out = gr.Textbox(label="Indoor/Outdoor", lines=1)
133
+ lighting_condition_out = gr.Textbox(label="Lighting Condition", lines=1)
134
+ human_out = gr.Textbox(label="Human Detected", lines=3)
135
+ animals_out = gr.Textbox(label="Animals Detected", lines=2)
136
+ objects_out = gr.Textbox(label="Objects Detected", lines=2)
137
+ hazards_out = gr.Textbox(label="Hazards Identified", lines=2)
138
+
139
+ webcam_input.stream(
140
+ process_webcam_stream_async,
141
+ inputs=[webcam_input],
142
+ outputs=[
143
+ description_out,
144
+ environment_out,
145
+ indoor_outdoor_out,
146
+ lighting_condition_out,
147
+ human_out,
148
+ animals_out,
149
+ objects_out,
150
+ hazards_out
151
+ ],
152
+ stream_every=1.0
153
+ )
154
+
155
+ if __name__ == "__main__":
156
+ demo.launch(ssr_mode=False)