OppaAI commited on
Commit
f1c8dde
Β·
verified Β·
1 Parent(s): 0db099e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -77
app.py CHANGED
@@ -9,80 +9,9 @@ import asyncio
9
  from dotenv import load_dotenv
10
  import ast # Import the Abstract Syntax Tree module
11
 
12
- # Load environment variables (ensure .env is set up locally)
13
- load_dotenv()
14
-
15
- ROBOT_ID = "Robot_MCP_Client"
16
- HF_TOKEN = os.environ.get("HF_TOKEN")
17
- if not HF_TOKEN:
18
- print("Warning: HF_TOKEN not found. API calls may fail.")
19
- HF_TOKEN = "missing_token_placeholder"
20
-
21
- # The MCP URL of your remote server
22
- MCP_SERVER_URL = "https://oppaai-robot-mcp-server.hf.space/gradio_api/mcp/"
23
- SERVER_NAME = "Robot_MCP_Server"
24
- TOOL_NAME = "Robot_MCP_Server_robot_watch"
25
-
26
-
27
- # Initialize the MCP client globally
28
- HTTP_TRANSPORT = StreamableHttpTransport(url=MCP_SERVER_URL)
29
- MCP_CLIENT = Client(transport=HTTP_TRANSPORT, name=SERVER_NAME)
30
-
31
-
32
- async def process_webcam_stream_async(image):
33
- """Send webcam image to HF MCP Server using MCP protocol and get result"""
34
- if image is None:
35
- return "", "", "", ""
36
-
37
- if HF_TOKEN == "missing_token_placeholder":
38
- return "Error: HF_TOKEN not set locally.", "", "", ""
39
-
40
- buffered = io.BytesIO()
41
- image.save(buffered, format="JPEG")
42
- b64_img = base64.b64encode(buffered.getvalue()).decode("utf-8")
43
-
44
- payload = {
45
- "hf_token_input": HF_TOKEN,
46
- "robot_id_input": ROBOT_ID,
47
- "image_b64_input": b64_img
48
- }
49
-
50
- try:
51
- async with MCP_CLIENT:
52
- response = await MCP_CLIENT.call_tool(TOOL_NAME, payload)
53
-
54
- if response.is_error:
55
- error_text = response.content[0].text if response.content and isinstance(response.content, list) else "Unknown error"
56
- raise Exception(f"MCP Tool Error: {error_text}")
57
-
58
- # Use ast.literal_eval because your server returns Python-formatted strings (single quotes)
59
- raw_text = response.content[0].text
60
- response_dict = ast.literal_eval(raw_text)
61
-
62
- # --- πŸ‘‡ EXTRACTING EACH FIELD CORRECTLY πŸ‘‡ ---
63
- vlm_result = response_dict.get("result", {})
64
-
65
- description_out = vlm_result.get("description", "")
66
- human_out = vlm_result.get("human", "")
67
- objects_list = vlm_result.get("objects", [])
68
- environment_out = vlm_result.get("environment", "")
69
-
70
- # Format the objects list into a single string for display
71
- objects_str = ", ".join(objects_list) if isinstance(objects_list, list) else str(objects_list)
72
-
73
- return (
74
- description_out,
75
- human_out,
76
- objects_str,
77
- environment_out
78
- )
79
-
80
- except Exception as e:
81
- print(f"Error calling remote MCP API: {e}")
82
- import traceback
83
- traceback.print_exc() # Helps debug other errors
84
- return f"Error: {e}", "", "", ""
85
 
 
86
 
87
  with gr.Blocks() as demo:
88
  gr.Markdown("## πŸŽ₯ Robot Vision Webcam Stream (using MCP Client)")
@@ -98,10 +27,12 @@ with gr.Blocks() as demo:
98
  type="pil"
99
  )
100
  with gr.Column():
101
- description_out = gr.Textbox(label="Description")
102
- human_out = gr.Textbox(label="Human")
103
- objects_out = gr.Textbox(label="Objects")
104
- environment_out = gr.Textbox(label="Environment")
 
 
105
 
106
  webcam_input.stream(
107
  process_webcam_stream_async,
 
9
  from dotenv import load_dotenv
10
  import ast # Import the Abstract Syntax Tree module
11
 
12
+ # ... (imports and global variables remain the same) ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ # ... (process_webcam_stream_async function remains the same) ...
15
 
16
  with gr.Blocks() as demo:
17
  gr.Markdown("## πŸŽ₯ Robot Vision Webcam Stream (using MCP Client)")
 
27
  type="pil"
28
  )
29
  with gr.Column():
30
+ # --- πŸ‘‡ INCREASED 'lines' PARAMETER HERE πŸ‘‡ ---
31
+ description_out = gr.Textbox(label="Description", lines=5) # Made larger
32
+ human_out = gr.Textbox(label="Human", lines=3) # Made larger
33
+ objects_out = gr.Textbox(label="Objects", lines=2) # Made larger
34
+ environment_out = gr.Textbox(label="Environment", lines=3) # Made larger
35
+ # --------------------------------------------------
36
 
37
  webcam_input.stream(
38
  process_webcam_stream_async,