OppaAI commited on
Commit
54151d7
·
verified ·
1 Parent(s): 867053d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -38
app.py CHANGED
@@ -7,17 +7,19 @@ from datetime import datetime
7
  import traceback
8
  from typing import Optional, Dict, Any
9
  import asyncio
10
- from fastmcp import Client
11
 
12
  # --- Configuration ---
13
  HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "OppaAI/Robot_MCP")
14
  HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
15
  REMOTE_MCP_URL = os.environ.get("REMOTE_MCP_URL", "https://oppaai-robot-mcp-server.hf.space/gradio_api/mcp/sse")
16
 
 
 
17
  # -----------------------------------------------------
18
  # Save and upload image to HF
19
  # -----------------------------------------------------
20
- def save_and_upload_image(image_b64: str, hf_token: str):
21
  try:
22
  image_bytes = base64.b64decode(image_b64)
23
  size_bytes = len(image_bytes)
@@ -70,25 +72,11 @@ def safe_parse_json_from_text(text: str) -> Optional[Dict[str, Any]]:
70
  return None
71
  return None
72
 
73
- # -----------------------------------------------------
74
- # Call remote MCP tool asynchronously
75
- # -----------------------------------------------------
76
- async def call_remote_tool(tool_name: str, **kwargs):
77
- async with Client(REMOTE_MCP_URL) as client:
78
- result = await client.call_tool(tool_name, **kwargs)
79
- return result
80
-
81
- def validate_and_call_tool(tool_name: str, tool_args: dict) -> Dict[str, Any]:
82
- try:
83
- return asyncio.run(call_remote_tool(tool_name, **tool_args))
84
- except Exception as e:
85
- traceback.print_exc()
86
- return {"error": f"Remote tool execution error: {str(e)}"}
87
-
88
  # -----------------------------------------------------
89
  # Main pipeline: image → VLM → remote tool
90
  # -----------------------------------------------------
91
- def process_and_describe(payload: Dict[str, Any]) -> Dict[str, Any]:
 
92
  if isinstance(payload, str):
93
  try:
94
  payload = json.loads(payload)
@@ -105,7 +93,7 @@ def process_and_describe(payload: Dict[str, Any]) -> Dict[str, Any]:
105
  return {"error": "image_b64 missing"}
106
 
107
  # Save + Upload
108
- _, hf_url, _, size_bytes = save_and_upload_image(image_b64, hf_token)
109
  if not hf_url:
110
  return {"error": "Image upload failed"}
111
 
@@ -113,24 +101,19 @@ def process_and_describe(payload: Dict[str, Any]) -> Dict[str, Any]:
113
  system_prompt = f"""
114
  Respond in STRICT JSON ONLY.
115
  Rules:
116
- 1. Provide a long detail description of what you see.
117
- 2. Decide ONE MCP tool to call from:
118
- - chat_with_human
119
- 3. If a human is gesturing with open hand, then set "tool_name": "chat_with_human".
120
- 4. Otherwise, set "tool_name": "" and "arguments": {{}}
121
-
122
  Output format:
123
  {{
124
  "description": "...",
125
- "tool_name": "chat_with_human",
126
- "arguments": {{}}
127
  }}
128
  """
129
 
130
  messages = [
131
  {"role": "system", "content": system_prompt},
132
  {"role": "user", "content": [
133
- {"type": "text", "text": "Analyze the image and call the appropriate MCP tool."},
134
  {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}}
135
  ]}
136
  ]
@@ -151,21 +134,11 @@ Output format:
151
  if parsed is None:
152
  return {"status": "model_no_json", "robot_id": robot_id, "vlm_raw": vlm_output, "message": "VLM returned invalid JSON"}
153
 
154
- # Call the MCP tool directly if VLM chooses one
155
- tool_name = parsed.get("tool_name")
156
- tool_args = parsed.get("arguments") or {}
157
- tool_result = None
158
- if tool_name:
159
- tool_result = validate_and_call_tool(tool_name, tool_args)
160
-
161
  return {
162
  "status": "success",
163
  "robot_id": robot_id,
164
  "file_size_bytes": size_bytes,
165
  "vlm_description": parsed.get("description"),
166
- "chosen_tool": tool_name,
167
- "tool_arguments": tool_args,
168
- "tool_execution_result": tool_result,
169
  "vlm_raw": vlm_output
170
  }
171
 
 
7
  import traceback
8
  from typing import Optional, Dict, Any
9
  import asyncio
10
+ from fastmcp import Client, FastMCP
11
 
12
  # --- Configuration ---
13
  HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "OppaAI/Robot_MCP")
14
  HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
15
  REMOTE_MCP_URL = os.environ.get("REMOTE_MCP_URL", "https://oppaai-robot-mcp-server.hf.space/gradio_api/mcp/sse")
16
 
17
+ mcp = FastMCP("Robot_MCP_Server")
18
+
19
  # -----------------------------------------------------
20
  # Save and upload image to HF
21
  # -----------------------------------------------------
22
+ def upload_image(image_b64: str, hf_token: str):
23
  try:
24
  image_bytes = base64.b64decode(image_b64)
25
  size_bytes = len(image_bytes)
 
72
  return None
73
  return None
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  # -----------------------------------------------------
76
  # Main pipeline: image → VLM → remote tool
77
  # -----------------------------------------------------
78
+ @mcp.tool()
79
+ def robot_watch(payload: Dict[str, Any]) -> Dict[str, Any]:
80
  if isinstance(payload, str):
81
  try:
82
  payload = json.loads(payload)
 
93
  return {"error": "image_b64 missing"}
94
 
95
  # Save + Upload
96
+ _, hf_url, _, size_bytes = upload_image(image_b64, hf_token)
97
  if not hf_url:
98
  return {"error": "Image upload failed"}
99
 
 
101
  system_prompt = f"""
102
  Respond in STRICT JSON ONLY.
103
  Rules:
104
+ Provide a long detail description of what you see
 
 
 
 
 
105
  Output format:
106
  {{
107
  "description": "...",
108
+ "human": brief description of humans if any (eg. a man with glasses)
109
+ "environment": category of the environment (eg. room)
110
  }}
111
  """
112
 
113
  messages = [
114
  {"role": "system", "content": system_prompt},
115
  {"role": "user", "content": [
116
+ {"type": "text", "text": "Analyze the image and provide the description."},
117
  {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}}
118
  ]}
119
  ]
 
134
  if parsed is None:
135
  return {"status": "model_no_json", "robot_id": robot_id, "vlm_raw": vlm_output, "message": "VLM returned invalid JSON"}
136
 
 
 
 
 
 
 
 
137
  return {
138
  "status": "success",
139
  "robot_id": robot_id,
140
  "file_size_bytes": size_bytes,
141
  "vlm_description": parsed.get("description"),
 
 
 
142
  "vlm_raw": vlm_output
143
  }
144