Spaces:

OppaAI
/

Robot_MCP_Server

Sleeping

App Files Files Community

OppaAI commited on Nov 18, 2025

Commit

5410665

verified ·

1 Parent(s): 18adda8

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -22

app.py CHANGED Viewed

@@ -3,9 +3,7 @@ import base64
 import gradio as gr
 from huggingface_hub import upload_file, InferenceClient
 import json
-from fastmcp import MCP, MCPClient
-from playsound import playsound
-from gtts import gTTS
 # --- Config ---
 HF_DATASET_REPO = "OppaAI/Robot_MCP"
@@ -16,16 +14,12 @@ mcp = MCP()  # 用於定義工具
 # --- MCP Tool ---
 @mcp.tools()
-def say_hi(text="Hi, How are you doing?"):
-    # 1️⃣ 生成 mp3
-    tts = gTTS(text=text, lang="en")
-    tmp_path = "/tmp/say_hi.mp3"
-    tts.save(tmp_path)
-    # 2️⃣ 播放音檔
-    playsound(tmp_path)
-    return f"Played: {text}"
 # --- Helper Functions ---
 def save_and_upload_image(image_b64, hf_token):
@@ -58,16 +52,22 @@ def process_and_describe(payload: dict):
         if not image_b64:
             return {"error": "No image provided."}
         local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64, hf_token)
         hf_client = InferenceClient(token=hf_token)
         system_prompt = """
         You are a helpful robot assistant.
         1. Describe the image in detail.
-        2. Suggest what the robot should do next.
-           - Human figure → say 'Hi'.
-        Always respond in JSON:
-        {"description": "...", "action": "say_hi"}
         """
         messages_payload = [
@@ -78,24 +78,31 @@ def process_and_describe(payload: dict):
             ]}
         ]
         chat_completion = hf_client.chat.completions.create(
             model=HF_VLM_MODEL,
             messages=messages_payload,
-            max_tokens=200
         )
         vlm_text = chat_completion.choices[0].message.content.strip()
         action_data = {}
         try:
             action_data = json.loads(vlm_text)
         except Exception:
-            action_data = {"description": vlm_text, "action": "unknown"}
-        # --- Call MCP tool ---
         vlm_action = action_data.get("action")
         tool_result = None
         if vlm_action == "say_hi":
-            tool_result = say_hi(text="Hi!")  # 這裡會生成 /tmp/say_hi.mp3
         return {
             "saved_to_hf_hub": True,
@@ -116,7 +123,7 @@ def process_and_describe(payload: dict):
 # --- Gradio MCP Interface ---
 demo = gr.Interface(
     fn=process_and_describe,
-    inputs=gr.JSON(label="Input Payload"),
     outputs=gr.JSON(label="Reply to Jetson"),
     api_name="predict"
 )

 import gradio as gr
 from huggingface_hub import upload_file, InferenceClient
 import json
+from fastmcp import MCP
 # --- Config ---
 HF_DATASET_REPO = "OppaAI/Robot_MCP"
 # --- MCP Tool ---
 @mcp.tools()
+def say_hi(greeting_text="Hi!"):
+    """Return a greeting command in JSON."""
+    return {
+        "command": "say_hi",
+        "text": greeting_text
+    }
 # --- Helper Functions ---
 def save_and_upload_image(image_b64, hf_token):
         if not image_b64:
             return {"error": "No image provided."}
+        # Save image and upload to HF
         local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64, hf_token)
         hf_client = InferenceClient(token=hf_token)
+        # System prompt for VLM
         system_prompt = """
         You are a helpful robot assistant.
         1. Describe the image in detail.
+        2. Suggest what the robot should do next:
+           - If you see a human figure, suggest saying 'Hi' in a friendly and varied way.
+        Always respond in JSON format:
+        {
+            "description": "...",
+            "action": "say_hi",
+            "greeting_text": "a friendly greeting that can be different each time"
+        }
         """
         messages_payload = [
             ]}
         ]
+        # Call VLM
         chat_completion = hf_client.chat.completions.create(
             model=HF_VLM_MODEL,
             messages=messages_payload,
+            max_tokens=300
         )
+        # Extract VLM text
         vlm_text = chat_completion.choices[0].message.content.strip()
         action_data = {}
         try:
             action_data = json.loads(vlm_text)
         except Exception:
+            action_data = {
+                "description": vlm_text,
+                "action": "unknown",
+                "greeting_text": "Hi!"
+            }
+        # --- Call MCP tool if needed ---
         vlm_action = action_data.get("action")
         tool_result = None
         if vlm_action == "say_hi":
+            greeting_text = action_data.get("greeting_text", "Hi!")
+            tool_result = say_hi(greeting_text=greeting_text)
         return {
             "saved_to_hf_hub": True,
 # --- Gradio MCP Interface ---
 demo = gr.Interface(
     fn=process_and_describe,
+    inputs=gr.JSON(label="Input Payload (Dict format with 'image_b64')"),
     outputs=gr.JSON(label="Reply to Jetson"),
     api_name="predict"
 )