Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,12 +8,22 @@ from typing import Dict, Any
|
|
| 8 |
import gradio as gr
|
| 9 |
from huggingface_hub import HfApi, InferenceClient
|
| 10 |
from fastmcp import FastMCP
|
|
|
|
| 11 |
|
| 12 |
HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "OppaAI/Robot_MCP")
|
| 13 |
HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
|
| 14 |
|
| 15 |
mcp = FastMCP("Robot_MCP_Server") # <-- Important
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
def upload_image(image_b64: str, hf_token: str):
|
| 19 |
try:
|
|
@@ -69,17 +79,19 @@ def safe_parse_json_from_text(text: str):
|
|
| 69 |
# ---------------------------------------------------
|
| 70 |
@mcp.tool(
|
| 71 |
name="robot_watch",
|
| 72 |
-
description="Analyze a base64 image using Qwen VLM and return structured JSON."
|
|
|
|
| 73 |
)
|
| 74 |
-
def robot_watch(payload:
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
|
|
|
| 78 |
|
| 79 |
if not hf_token:
|
|
|
|
| 80 |
return {"error": "Missing hf_token"}
|
| 81 |
-
|
| 82 |
-
return {"error": "Missing image_b64"}
|
| 83 |
|
| 84 |
_, hf_url, _, size_bytes = upload_image(image_b64, hf_token)
|
| 85 |
if not hf_url:
|
|
@@ -130,8 +142,11 @@ Respond in STRICT JSON ONLY:
|
|
| 130 |
# ---------------------------------------------------
|
| 131 |
# Gradio UI — separate from MCP tool layer
|
| 132 |
# ---------------------------------------------------
|
| 133 |
-
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
|
| 137 |
app = gr.Interface(
|
|
@@ -143,4 +158,5 @@ app = gr.Interface(
|
|
| 143 |
)
|
| 144 |
|
| 145 |
if __name__ == "__main__":
|
|
|
|
| 146 |
app.launch(mcp_server=True)
|
|
|
|
| 8 |
import gradio as gr
|
| 9 |
from huggingface_hub import HfApi, InferenceClient
|
| 10 |
from fastmcp import FastMCP
|
| 11 |
+
from pydantic import BaseModel, Field # Import Pydantic BaseModel and Field
|
| 12 |
|
| 13 |
HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "OppaAI/Robot_MCP")
|
| 14 |
HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
|
| 15 |
|
| 16 |
mcp = FastMCP("Robot_MCP_Server") # <-- Important
|
| 17 |
|
| 18 |
+
# ---------------------------------------------------
|
| 19 |
+
# Define Pydantic Schema for the input payload
|
| 20 |
+
# ---------------------------------------------------
|
| 21 |
+
# This defines the expected structure and automatically generates the valid JSON schema
|
| 22 |
+
class RobotWatchPayload(BaseModel):
|
| 23 |
+
hf_token: str = Field(description="Your Hugging Face API token.")
|
| 24 |
+
robot_id: str = Field(description="The unique identifier for the robot.", default="unknown")
|
| 25 |
+
image_b64: str = Field(description="Base64 encoded image data.")
|
| 26 |
+
|
| 27 |
|
| 28 |
def upload_image(image_b64: str, hf_token: str):
|
| 29 |
try:
|
|
|
|
| 79 |
# ---------------------------------------------------
|
| 80 |
@mcp.tool(
|
| 81 |
name="robot_watch",
|
| 82 |
+
description="Analyze a base64 image using Qwen VLM and return structured JSON.",
|
| 83 |
+
input_schema=RobotWatchPayload # <-- Explicitly use the Pydantic schema here
|
| 84 |
)
|
| 85 |
+
def robot_watch(payload: RobotWatchPayload): # <-- Type hint with Pydantic model
|
| 86 |
+
# The payload is already validated and typed correctly by fastmcp/pydantic
|
| 87 |
+
hf_token = payload.hf_token
|
| 88 |
+
image_b64 = payload.image_b64
|
| 89 |
+
robot_id = payload.robot_id
|
| 90 |
|
| 91 |
if not hf_token:
|
| 92 |
+
# This check is technically redundant if the schema demands it, but safe.
|
| 93 |
return {"error": "Missing hf_token"}
|
| 94 |
+
# image_b64 existence is also guaranteed by the schema
|
|
|
|
| 95 |
|
| 96 |
_, hf_url, _, size_bytes = upload_image(image_b64, hf_token)
|
| 97 |
if not hf_url:
|
|
|
|
| 142 |
# ---------------------------------------------------
|
| 143 |
# Gradio UI — separate from MCP tool layer
|
| 144 |
# ---------------------------------------------------
|
| 145 |
+
# The process_json function will still work with the Pydantic model input
|
| 146 |
+
def process_json(payload: Dict[str, Any]):
|
| 147 |
+
# When called via Gradio UI (not MCP), input will be Dict, so handle type conversion
|
| 148 |
+
pydantic_payload = RobotWatchPayload(**payload)
|
| 149 |
+
return robot_watch(pydantic_payload)
|
| 150 |
|
| 151 |
|
| 152 |
app = gr.Interface(
|
|
|
|
| 158 |
)
|
| 159 |
|
| 160 |
if __name__ == "__main__":
|
| 161 |
+
# Gradio will use the 'mcp' instance defined globally to host the MCP server endpoints
|
| 162 |
app.launch(mcp_server=True)
|