OppaAI commited on
Commit
87deda2
·
verified ·
1 Parent(s): ff60aab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -27
app.py CHANGED
@@ -6,16 +6,17 @@ from huggingface_hub import HfApi, InferenceClient
6
  from datetime import datetime
7
  import traceback
8
  from typing import Optional, Dict, Any
9
- import asyncio
10
- from fastmcp import Client, FastMCP
11
 
12
  # --- Configuration ---
13
  HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "OppaAI/Robot_MCP")
14
  HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
15
- REMOTE_MCP_URL = os.environ.get("REMOTE_MCP_URL", "https://oppaai-robot-mcp-server.hf.space/gradio_api/mcp/sse")
16
 
 
17
  mcp = FastMCP("Robot_MCP_Server")
18
 
 
19
  # -----------------------------------------------------
20
  # Save and upload image to HF
21
  # -----------------------------------------------------
@@ -42,7 +43,9 @@ def upload_image(image_b64: str, hf_token: str):
42
  token=hf_token
43
  )
44
 
45
- url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/{filename}"
 
 
46
  return local_path, url, filename, size_bytes
47
 
48
  except Exception as e:
@@ -50,6 +53,7 @@ def upload_image(image_b64: str, hf_token: str):
50
  traceback.print_exc()
51
  return None, None, None, 0
52
 
 
53
  # -----------------------------------------------------
54
  # JSON parsing helper
55
  # -----------------------------------------------------
@@ -60,20 +64,24 @@ def safe_parse_json_from_text(text: str) -> Optional[Dict[str, Any]]:
60
  return json.loads(text)
61
  except:
62
  pass
 
63
  cleaned = text.strip().strip("`").strip()
64
  if cleaned.lower().startswith("json"):
65
  cleaned = cleaned[4:].strip()
 
66
  try:
67
  start = cleaned.find("{")
68
  end = cleaned.rfind("}")
69
  if start >= 0 and end > start:
70
- return json.loads(cleaned[start:end+1])
71
  except:
72
  return None
 
73
  return None
74
 
 
75
  # -----------------------------------------------------
76
- # Main pipeline: image → VLM → remote tool
77
  # -----------------------------------------------------
78
  @mcp.tool()
79
  def robot_watch(payload: Dict[str, Any]) -> Dict[str, Any]:
@@ -92,22 +100,20 @@ def robot_watch(payload: Dict[str, Any]) -> Dict[str, Any]:
92
  if not image_b64:
93
  return {"error": "image_b64 missing"}
94
 
95
- # Save + Upload
96
  _, hf_url, _, size_bytes = upload_image(image_b64, hf_token)
97
  if not hf_url:
98
  return {"error": "Image upload failed"}
99
 
100
- # VLM system prompt: decide MCP tool automatically
101
- system_prompt = f"""
102
  Respond in STRICT JSON ONLY.
103
- Rules:
104
- Provide a long detail description of what you see
105
  Output format:
106
- {{
107
  "description": "...",
108
- "human": brief description of humans if any (eg. a man with glasses)
109
- "environment": category of the environment (eg. room)
110
- }}
111
  """
112
 
113
  messages = [
@@ -119,6 +125,7 @@ Output format:
119
  ]
120
 
121
  client = InferenceClient(token=hf_token)
 
122
  try:
123
  response = client.chat.completions.create(
124
  model=HF_VLM_MODEL,
@@ -131,20 +138,34 @@ Output format:
131
 
132
  vlm_output = response.choices[0].message.content.strip()
133
  parsed = safe_parse_json_from_text(vlm_output)
 
134
  if parsed is None:
135
- return {"status": "model_no_json", "robot_id": robot_id, "vlm_raw": vlm_output, "message": "VLM returned invalid JSON"}
 
 
 
 
 
136
 
137
  return {
138
  "status": "success",
139
  "robot_id": robot_id,
140
  "file_size_bytes": size_bytes,
141
- "vlm_description": parsed.get("description"),
 
 
 
142
  "vlm_raw": vlm_output
143
  }
144
 
145
- # ------------------------------
146
- # Gradio Interface
147
- # ------------------------------
 
 
 
 
 
148
  app = gr.Interface(
149
  fn=process_and_describe,
150
  inputs=gr.JSON(label="Input JSON Payload (must include hf_token & image_b64)"),
@@ -153,12 +174,13 @@ app = gr.Interface(
153
  flagging_mode="never"
154
  )
155
 
156
- # ------------------------------
157
- # Main Entry
158
- # ------------------------------
 
159
  if __name__ == "__main__":
160
- print(f"[Config] HF_DATASET_REPO: {HF_DATASET_REPO}")
161
- print(f"[Config] HF_VLM_MODEL: {HF_VLM_MODEL}")
162
- print(f"[Config] REMOTE_MCP_URL: {REMOTE_MCP_URL}")
163
  print("[Gradio] Launching interface...")
164
- app.launch(mcp_server=True)
 
6
  from datetime import datetime
7
  import traceback
8
  from typing import Optional, Dict, Any
9
+
10
+ from fastmcp import FastMCP
11
 
12
  # --- Configuration ---
13
  HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "OppaAI/Robot_MCP")
14
  HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
 
15
 
16
+ # Create MCP server
17
  mcp = FastMCP("Robot_MCP_Server")
18
 
19
+
20
  # -----------------------------------------------------
21
  # Save and upload image to HF
22
  # -----------------------------------------------------
 
43
  token=hf_token
44
  )
45
 
46
+ # FIXED URL
47
+ url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/tmp/{filename}"
48
+
49
  return local_path, url, filename, size_bytes
50
 
51
  except Exception as e:
 
53
  traceback.print_exc()
54
  return None, None, None, 0
55
 
56
+
57
  # -----------------------------------------------------
58
  # JSON parsing helper
59
  # -----------------------------------------------------
 
64
  return json.loads(text)
65
  except:
66
  pass
67
+
68
  cleaned = text.strip().strip("`").strip()
69
  if cleaned.lower().startswith("json"):
70
  cleaned = cleaned[4:].strip()
71
+
72
  try:
73
  start = cleaned.find("{")
74
  end = cleaned.rfind("}")
75
  if start >= 0 and end > start:
76
+ return json.loads(cleaned[start:end + 1])
77
  except:
78
  return None
79
+
80
  return None
81
 
82
+
83
  # -----------------------------------------------------
84
+ # MCP Tool: image → VLM → structured JSON
85
  # -----------------------------------------------------
86
  @mcp.tool()
87
  def robot_watch(payload: Dict[str, Any]) -> Dict[str, Any]:
 
100
  if not image_b64:
101
  return {"error": "image_b64 missing"}
102
 
103
+ # 1. Save + Upload
104
  _, hf_url, _, size_bytes = upload_image(image_b64, hf_token)
105
  if not hf_url:
106
  return {"error": "Image upload failed"}
107
 
108
+ # 2. VLM prompt
109
+ system_prompt = """
110
  Respond in STRICT JSON ONLY.
 
 
111
  Output format:
112
+ {
113
  "description": "...",
114
+ "human": "...",
115
+ "environment": "..."
116
+ }
117
  """
118
 
119
  messages = [
 
125
  ]
126
 
127
  client = InferenceClient(token=hf_token)
128
+
129
  try:
130
  response = client.chat.completions.create(
131
  model=HF_VLM_MODEL,
 
138
 
139
  vlm_output = response.choices[0].message.content.strip()
140
  parsed = safe_parse_json_from_text(vlm_output)
141
+
142
  if parsed is None:
143
+ return {
144
+ "status": "model_no_json",
145
+ "robot_id": robot_id,
146
+ "vlm_raw": vlm_output,
147
+ "message": "VLM returned invalid JSON"
148
+ }
149
 
150
  return {
151
  "status": "success",
152
  "robot_id": robot_id,
153
  "file_size_bytes": size_bytes,
154
+ "image_url": hf_url,
155
+ "description": parsed.get("description"),
156
+ "human": parsed.get("human"),
157
+ "environment": parsed.get("environment"),
158
  "vlm_raw": vlm_output
159
  }
160
 
161
+
162
+ # -----------------------------------------------------
163
+ # Gradio Interface wrapper
164
+ # -----------------------------------------------------
165
+ def process_and_describe(payload):
166
+ return robot_watch(payload)
167
+
168
+
169
  app = gr.Interface(
170
  fn=process_and_describe,
171
  inputs=gr.JSON(label="Input JSON Payload (must include hf_token & image_b64)"),
 
174
  flagging_mode="never"
175
  )
176
 
177
+
178
+ # -----------------------------------------------------
179
+ # Entry
180
+ # -----------------------------------------------------
181
  if __name__ == "__main__":
182
+ print("[MCP] Robot MCP Server starting...")
183
+ mcp.run(background=True)
184
+
185
  print("[Gradio] Launching interface...")
186
+ app.launch()