OppaAI commited on
Commit
bdb8def
·
verified ·
1 Parent(s): eaeee99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -60
app.py CHANGED
@@ -3,26 +3,21 @@ import base64
3
  import json
4
  from datetime import datetime
5
  import traceback
6
- from typing import Optional, Dict, Any
7
 
8
  import gradio as gr
9
  from huggingface_hub import HfApi, InferenceClient
10
  from fastmcp import FastMCP
11
 
12
-
13
  HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "OppaAI/Robot_MCP")
14
  HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
15
 
16
- mcp = FastMCP("Robot_MCP_Server")
17
 
18
 
19
- # -------------------------------
20
- # Upload helper
21
- # -------------------------------
22
  def upload_image(image_b64: str, hf_token: str):
23
  try:
24
  image_bytes = base64.b64decode(image_b64)
25
- size_bytes = len(image_bytes)
26
 
27
  os.makedirs("/tmp", exist_ok=True)
28
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
@@ -32,7 +27,6 @@ def upload_image(image_b64: str, hf_token: str):
32
  f.write(image_bytes)
33
 
34
  filename = f"robot_{timestamp}.jpg"
35
-
36
  api = HfApi()
37
  api.upload_file(
38
  path_or_fileobj=local_path,
@@ -43,16 +37,13 @@ def upload_image(image_b64: str, hf_token: str):
43
  )
44
 
45
  url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/tmp/{filename}"
46
- return local_path, url, filename, size_bytes
47
 
48
  except Exception:
49
  traceback.print_exc()
50
  return None, None, None, 0
51
 
52
 
53
- # -------------------------------
54
- # Safe JSON parse
55
- # -------------------------------
56
  def safe_parse_json_from_text(text: str):
57
  if not text:
58
  return None
@@ -73,42 +64,34 @@ def safe_parse_json_from_text(text: str):
73
  return None
74
 
75
 
76
- # -------------------------------
77
- # TRUE CORE FUNCTION
78
- # -------------------------------
79
- # -------------------------------
80
- # TRUE CORE FUNCTION (with objects)
81
- # -------------------------------
82
- @mcp.tool(name="robot_watch")
83
- def robot_watch_core(payload: Dict[str, Any]):
84
- if isinstance(payload, str):
85
- try:
86
- payload = json.loads(payload)
87
- except:
88
- return {"error": "Invalid JSON payload"}
89
-
90
  hf_token = payload.get("hf_token")
91
- if not hf_token:
92
- return {"error": "hf_token missing"}
93
-
94
- robot_id = payload.get("robot_id", "unknown")
95
  image_b64 = payload.get("image_b64")
 
 
 
 
96
  if not image_b64:
97
- return {"error": "image_b64 missing"}
98
 
99
- # Upload
100
  _, hf_url, _, size_bytes = upload_image(image_b64, hf_token)
101
  if not hf_url:
102
  return {"error": "Image upload failed"}
103
 
104
- # VLM
105
  system_prompt = """
106
- Respond in STRICT JSON ONLY.
107
  {
108
  "description": "...",
109
  "human": "...",
110
  "environment": "...",
111
- "objects": [] // list of detected objects
112
  }
113
  """
114
 
@@ -121,7 +104,6 @@ Respond in STRICT JSON ONLY.
121
  ]
122
 
123
  client = InferenceClient(token=hf_token)
124
-
125
  try:
126
  resp = client.chat.completions.create(
127
  model=HF_VLM_MODEL,
@@ -133,45 +115,30 @@ Respond in STRICT JSON ONLY.
133
  return {"status": "error", "message": str(e)}
134
 
135
  vlm_output = resp.choices[0].message.content.strip()
136
- parsed = safe_parse_json_from_text(vlm_output)
137
-
138
- if parsed is None:
139
- return {
140
- "status": "model_no_json",
141
- "vlm_raw": vlm_output,
142
- "message": "Invalid JSON returned"
143
- }
144
-
145
- # Ensure "objects" is a list
146
- objects = parsed.get("objects", [])
147
- if not isinstance(objects, list):
148
- objects = []
149
 
150
  return {
151
  "status": "success",
152
  "robot_id": robot_id,
153
  "file_size_bytes": size_bytes,
154
  "image_url": hf_url,
155
- "description": parsed.get("description"),
156
- "human": parsed.get("human"),
157
- "environment": parsed.get("environment"),
158
- "objects": objects, # ← new field
159
  "vlm_raw": vlm_output
160
  }
161
 
162
- # -------------------------------
163
- # Gradio wrapper
164
- # -------------------------------
 
165
  def process_json(payload):
166
- return robot_watch_core(payload)
167
 
168
 
169
  app = gr.Interface(
170
  fn=process_json,
171
- inputs=gr.JSON(label="Input JSON"),
172
- outputs=gr.JSON(label="Result JSON"),
173
  title="Robot MCP Server",
174
- description="JSON endpoint for robot vision pipeline.",
175
  api_name="predict"
176
  )
177
 
 
3
  import json
4
  from datetime import datetime
5
  import traceback
6
+ from typing import Dict, Any
7
 
8
  import gradio as gr
9
  from huggingface_hub import HfApi, InferenceClient
10
  from fastmcp import FastMCP
11
 
 
12
  HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "OppaAI/Robot_MCP")
13
  HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
14
 
15
+ mcp = FastMCP("Robot_MCP_Server") # <-- Important
16
 
17
 
 
 
 
18
  def upload_image(image_b64: str, hf_token: str):
19
  try:
20
  image_bytes = base64.b64decode(image_b64)
 
21
 
22
  os.makedirs("/tmp", exist_ok=True)
23
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
 
27
  f.write(image_bytes)
28
 
29
  filename = f"robot_{timestamp}.jpg"
 
30
  api = HfApi()
31
  api.upload_file(
32
  path_or_fileobj=local_path,
 
37
  )
38
 
39
  url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/tmp/{filename}"
40
+ return local_path, url, filename, len(image_bytes)
41
 
42
  except Exception:
43
  traceback.print_exc()
44
  return None, None, None, 0
45
 
46
 
 
 
 
47
  def safe_parse_json_from_text(text: str):
48
  if not text:
49
  return None
 
64
  return None
65
 
66
 
67
+ # ---------------------------------------------------
68
+ # TRUE MCP TOOL — THIS must be exposed to MCP client
69
+ # ---------------------------------------------------
70
+ @mcp.tool(
71
+ name="robot_watch",
72
+ description="Analyze a base64 image using Qwen VLM and return structured JSON."
73
+ )
74
+ def robot_watch(payload: Dict[str, Any]):
 
 
 
 
 
 
75
  hf_token = payload.get("hf_token")
 
 
 
 
76
  image_b64 = payload.get("image_b64")
77
+ robot_id = payload.get("robot_id", "unknown")
78
+
79
+ if not hf_token:
80
+ return {"error": "Missing hf_token"}
81
  if not image_b64:
82
+ return {"error": "Missing image_b64"}
83
 
 
84
  _, hf_url, _, size_bytes = upload_image(image_b64, hf_token)
85
  if not hf_url:
86
  return {"error": "Image upload failed"}
87
 
 
88
  system_prompt = """
89
+ Respond in STRICT JSON ONLY:
90
  {
91
  "description": "...",
92
  "human": "...",
93
  "environment": "...",
94
+ "objects": []
95
  }
96
  """
97
 
 
104
  ]
105
 
106
  client = InferenceClient(token=hf_token)
 
107
  try:
108
  resp = client.chat.completions.create(
109
  model=HF_VLM_MODEL,
 
115
  return {"status": "error", "message": str(e)}
116
 
117
  vlm_output = resp.choices[0].message.content.strip()
118
+ parsed = safe_parse_json_from_text(vlm_output) or {}
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
  return {
121
  "status": "success",
122
  "robot_id": robot_id,
123
  "file_size_bytes": size_bytes,
124
  "image_url": hf_url,
125
+ "result": parsed,
 
 
 
126
  "vlm_raw": vlm_output
127
  }
128
 
129
+
130
+ # ---------------------------------------------------
131
+ # Gradio UI — separate from MCP tool layer
132
+ # ---------------------------------------------------
133
  def process_json(payload):
134
+ return robot_watch(payload)
135
 
136
 
137
  app = gr.Interface(
138
  fn=process_json,
139
+ inputs=gr.JSON(),
140
+ outputs=gr.JSON(),
141
  title="Robot MCP Server",
 
142
  api_name="predict"
143
  )
144