Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,16 +6,17 @@ from huggingface_hub import HfApi, InferenceClient
|
|
| 6 |
from datetime import datetime
|
| 7 |
import traceback
|
| 8 |
from typing import Optional, Dict, Any
|
| 9 |
-
|
| 10 |
-
from fastmcp import
|
| 11 |
|
| 12 |
# --- Configuration ---
|
| 13 |
HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "OppaAI/Robot_MCP")
|
| 14 |
HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
|
| 15 |
-
REMOTE_MCP_URL = os.environ.get("REMOTE_MCP_URL", "https://oppaai-robot-mcp-server.hf.space/gradio_api/mcp/sse")
|
| 16 |
|
|
|
|
| 17 |
mcp = FastMCP("Robot_MCP_Server")
|
| 18 |
|
|
|
|
| 19 |
# -----------------------------------------------------
|
| 20 |
# Save and upload image to HF
|
| 21 |
# -----------------------------------------------------
|
|
@@ -42,7 +43,9 @@ def upload_image(image_b64: str, hf_token: str):
|
|
| 42 |
token=hf_token
|
| 43 |
)
|
| 44 |
|
| 45 |
-
|
|
|
|
|
|
|
| 46 |
return local_path, url, filename, size_bytes
|
| 47 |
|
| 48 |
except Exception as e:
|
|
@@ -50,6 +53,7 @@ def upload_image(image_b64: str, hf_token: str):
|
|
| 50 |
traceback.print_exc()
|
| 51 |
return None, None, None, 0
|
| 52 |
|
|
|
|
| 53 |
# -----------------------------------------------------
|
| 54 |
# JSON parsing helper
|
| 55 |
# -----------------------------------------------------
|
|
@@ -60,20 +64,24 @@ def safe_parse_json_from_text(text: str) -> Optional[Dict[str, Any]]:
|
|
| 60 |
return json.loads(text)
|
| 61 |
except:
|
| 62 |
pass
|
|
|
|
| 63 |
cleaned = text.strip().strip("`").strip()
|
| 64 |
if cleaned.lower().startswith("json"):
|
| 65 |
cleaned = cleaned[4:].strip()
|
|
|
|
| 66 |
try:
|
| 67 |
start = cleaned.find("{")
|
| 68 |
end = cleaned.rfind("}")
|
| 69 |
if start >= 0 and end > start:
|
| 70 |
-
return json.loads(cleaned[start:end+1])
|
| 71 |
except:
|
| 72 |
return None
|
|
|
|
| 73 |
return None
|
| 74 |
|
|
|
|
| 75 |
# -----------------------------------------------------
|
| 76 |
-
#
|
| 77 |
# -----------------------------------------------------
|
| 78 |
@mcp.tool()
|
| 79 |
def robot_watch(payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
@@ -92,22 +100,20 @@ def robot_watch(payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 92 |
if not image_b64:
|
| 93 |
return {"error": "image_b64 missing"}
|
| 94 |
|
| 95 |
-
# Save + Upload
|
| 96 |
_, hf_url, _, size_bytes = upload_image(image_b64, hf_token)
|
| 97 |
if not hf_url:
|
| 98 |
return {"error": "Image upload failed"}
|
| 99 |
|
| 100 |
-
# VLM
|
| 101 |
-
system_prompt =
|
| 102 |
Respond in STRICT JSON ONLY.
|
| 103 |
-
Rules:
|
| 104 |
-
Provide a long detail description of what you see
|
| 105 |
Output format:
|
| 106 |
-
{
|
| 107 |
"description": "...",
|
| 108 |
-
"human":
|
| 109 |
-
"environment":
|
| 110 |
-
}
|
| 111 |
"""
|
| 112 |
|
| 113 |
messages = [
|
|
@@ -119,6 +125,7 @@ Output format:
|
|
| 119 |
]
|
| 120 |
|
| 121 |
client = InferenceClient(token=hf_token)
|
|
|
|
| 122 |
try:
|
| 123 |
response = client.chat.completions.create(
|
| 124 |
model=HF_VLM_MODEL,
|
|
@@ -131,20 +138,34 @@ Output format:
|
|
| 131 |
|
| 132 |
vlm_output = response.choices[0].message.content.strip()
|
| 133 |
parsed = safe_parse_json_from_text(vlm_output)
|
|
|
|
| 134 |
if parsed is None:
|
| 135 |
-
return {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
return {
|
| 138 |
"status": "success",
|
| 139 |
"robot_id": robot_id,
|
| 140 |
"file_size_bytes": size_bytes,
|
| 141 |
-
"
|
|
|
|
|
|
|
|
|
|
| 142 |
"vlm_raw": vlm_output
|
| 143 |
}
|
| 144 |
|
| 145 |
-
|
| 146 |
-
#
|
| 147 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
app = gr.Interface(
|
| 149 |
fn=process_and_describe,
|
| 150 |
inputs=gr.JSON(label="Input JSON Payload (must include hf_token & image_b64)"),
|
|
@@ -153,12 +174,13 @@ app = gr.Interface(
|
|
| 153 |
flagging_mode="never"
|
| 154 |
)
|
| 155 |
|
| 156 |
-
|
| 157 |
-
#
|
| 158 |
-
#
|
|
|
|
| 159 |
if __name__ == "__main__":
|
| 160 |
-
print(
|
| 161 |
-
|
| 162 |
-
|
| 163 |
print("[Gradio] Launching interface...")
|
| 164 |
-
app.launch(
|
|
|
|
| 6 |
from datetime import datetime
|
| 7 |
import traceback
|
| 8 |
from typing import Optional, Dict, Any
|
| 9 |
+
|
| 10 |
+
from fastmcp import FastMCP
|
| 11 |
|
| 12 |
# --- Configuration ---
|
| 13 |
HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "OppaAI/Robot_MCP")
|
| 14 |
HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
|
|
|
|
| 15 |
|
| 16 |
+
# Create MCP server
|
| 17 |
mcp = FastMCP("Robot_MCP_Server")
|
| 18 |
|
| 19 |
+
|
| 20 |
# -----------------------------------------------------
|
| 21 |
# Save and upload image to HF
|
| 22 |
# -----------------------------------------------------
|
|
|
|
| 43 |
token=hf_token
|
| 44 |
)
|
| 45 |
|
| 46 |
+
# FIXED URL
|
| 47 |
+
url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/tmp/{filename}"
|
| 48 |
+
|
| 49 |
return local_path, url, filename, size_bytes
|
| 50 |
|
| 51 |
except Exception as e:
|
|
|
|
| 53 |
traceback.print_exc()
|
| 54 |
return None, None, None, 0
|
| 55 |
|
| 56 |
+
|
| 57 |
# -----------------------------------------------------
|
| 58 |
# JSON parsing helper
|
| 59 |
# -----------------------------------------------------
|
|
|
|
| 64 |
return json.loads(text)
|
| 65 |
except:
|
| 66 |
pass
|
| 67 |
+
|
| 68 |
cleaned = text.strip().strip("`").strip()
|
| 69 |
if cleaned.lower().startswith("json"):
|
| 70 |
cleaned = cleaned[4:].strip()
|
| 71 |
+
|
| 72 |
try:
|
| 73 |
start = cleaned.find("{")
|
| 74 |
end = cleaned.rfind("}")
|
| 75 |
if start >= 0 and end > start:
|
| 76 |
+
return json.loads(cleaned[start:end + 1])
|
| 77 |
except:
|
| 78 |
return None
|
| 79 |
+
|
| 80 |
return None
|
| 81 |
|
| 82 |
+
|
| 83 |
# -----------------------------------------------------
|
| 84 |
+
# MCP Tool: image → VLM → structured JSON
|
| 85 |
# -----------------------------------------------------
|
| 86 |
@mcp.tool()
|
| 87 |
def robot_watch(payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
|
| 100 |
if not image_b64:
|
| 101 |
return {"error": "image_b64 missing"}
|
| 102 |
|
| 103 |
+
# 1. Save + Upload
|
| 104 |
_, hf_url, _, size_bytes = upload_image(image_b64, hf_token)
|
| 105 |
if not hf_url:
|
| 106 |
return {"error": "Image upload failed"}
|
| 107 |
|
| 108 |
+
# 2. VLM prompt
|
| 109 |
+
system_prompt = """
|
| 110 |
Respond in STRICT JSON ONLY.
|
|
|
|
|
|
|
| 111 |
Output format:
|
| 112 |
+
{
|
| 113 |
"description": "...",
|
| 114 |
+
"human": "...",
|
| 115 |
+
"environment": "..."
|
| 116 |
+
}
|
| 117 |
"""
|
| 118 |
|
| 119 |
messages = [
|
|
|
|
| 125 |
]
|
| 126 |
|
| 127 |
client = InferenceClient(token=hf_token)
|
| 128 |
+
|
| 129 |
try:
|
| 130 |
response = client.chat.completions.create(
|
| 131 |
model=HF_VLM_MODEL,
|
|
|
|
| 138 |
|
| 139 |
vlm_output = response.choices[0].message.content.strip()
|
| 140 |
parsed = safe_parse_json_from_text(vlm_output)
|
| 141 |
+
|
| 142 |
if parsed is None:
|
| 143 |
+
return {
|
| 144 |
+
"status": "model_no_json",
|
| 145 |
+
"robot_id": robot_id,
|
| 146 |
+
"vlm_raw": vlm_output,
|
| 147 |
+
"message": "VLM returned invalid JSON"
|
| 148 |
+
}
|
| 149 |
|
| 150 |
return {
|
| 151 |
"status": "success",
|
| 152 |
"robot_id": robot_id,
|
| 153 |
"file_size_bytes": size_bytes,
|
| 154 |
+
"image_url": hf_url,
|
| 155 |
+
"description": parsed.get("description"),
|
| 156 |
+
"human": parsed.get("human"),
|
| 157 |
+
"environment": parsed.get("environment"),
|
| 158 |
"vlm_raw": vlm_output
|
| 159 |
}
|
| 160 |
|
| 161 |
+
|
| 162 |
+
# -----------------------------------------------------
|
| 163 |
+
# Gradio Interface wrapper
|
| 164 |
+
# -----------------------------------------------------
|
| 165 |
+
def process_and_describe(payload):
|
| 166 |
+
return robot_watch(payload)
|
| 167 |
+
|
| 168 |
+
|
| 169 |
app = gr.Interface(
|
| 170 |
fn=process_and_describe,
|
| 171 |
inputs=gr.JSON(label="Input JSON Payload (must include hf_token & image_b64)"),
|
|
|
|
| 174 |
flagging_mode="never"
|
| 175 |
)
|
| 176 |
|
| 177 |
+
|
| 178 |
+
# -----------------------------------------------------
|
| 179 |
+
# Entry
|
| 180 |
+
# -----------------------------------------------------
|
| 181 |
if __name__ == "__main__":
|
| 182 |
+
print("[MCP] Robot MCP Server starting...")
|
| 183 |
+
mcp.run(background=True)
|
| 184 |
+
|
| 185 |
print("[Gradio] Launching interface...")
|
| 186 |
+
app.launch()
|