OppaAI commited on
Commit
9c6065d
·
verified ·
1 Parent(s): 5410665

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -43
app.py CHANGED
@@ -3,23 +3,23 @@ import base64
3
  import gradio as gr
4
  from huggingface_hub import upload_file, InferenceClient
5
  import json
6
- from fastmcp import MCP
7
 
8
  # --- Config ---
9
  HF_DATASET_REPO = "OppaAI/Robot_MCP"
10
- HF_VLM_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
11
 
12
  # --- MCP server instance ---
13
- mcp = MCP() # 用於定義工具
 
 
 
14
 
15
  # --- MCP Tool ---
16
  @mcp.tools()
17
- def say_hi(greeting_text="Hi!"):
18
  """Return a greeting command in JSON."""
19
- return {
20
- "command": "say_hi",
21
- "text": greeting_text
22
- }
23
 
24
  # --- Helper Functions ---
25
  def save_and_upload_image(image_b64, hf_token):
@@ -52,22 +52,23 @@ def process_and_describe(payload: dict):
52
  if not image_b64:
53
  return {"error": "No image provided."}
54
 
55
- # Save image and upload to HF
56
  local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64, hf_token)
 
 
57
  hf_client = InferenceClient(token=hf_token)
58
 
59
- # System prompt for VLM
60
- system_prompt = """
61
- You are a helpful robot assistant.
62
- 1. Describe the image in detail.
63
- 2. Suggest what the robot should do next:
64
- - If you see a human figure, suggest saying 'Hi' in a friendly and varied way.
65
- Always respond in JSON format:
66
- {
67
- "description": "...",
68
- "action": "say_hi",
69
- "greeting_text": "a friendly greeting that can be different each time"
70
- }
71
  """
72
 
73
  messages_payload = [
@@ -78,31 +79,17 @@ def process_and_describe(payload: dict):
78
  ]}
79
  ]
80
 
81
- # Call VLM
82
  chat_completion = hf_client.chat.completions.create(
83
  model=HF_VLM_MODEL,
84
  messages=messages_payload,
85
  max_tokens=300
86
  )
87
 
88
- # Extract VLM text
89
  vlm_text = chat_completion.choices[0].message.content.strip()
90
- action_data = {}
91
- try:
92
- action_data = json.loads(vlm_text)
93
- except Exception:
94
- action_data = {
95
- "description": vlm_text,
96
- "action": "unknown",
97
- "greeting_text": "Hi!"
98
- }
99
-
100
- # --- Call MCP tool if needed ---
101
- vlm_action = action_data.get("action")
102
- tool_result = None
103
- if vlm_action == "say_hi":
104
- greeting_text = action_data.get("greeting_text", "Hi!")
105
- tool_result = say_hi(greeting_text=greeting_text)
106
 
107
  return {
108
  "saved_to_hf_hub": True,
@@ -112,9 +99,7 @@ def process_and_describe(payload: dict):
112
  "file_size_bytes": size_bytes,
113
  "robot_id": robot_id,
114
  "vlm_response": vlm_text,
115
- "vlm_action": vlm_action,
116
- "vlm_description": action_data.get("description", ""),
117
- "tool_result": tool_result
118
  }
119
 
120
  except Exception as e:
@@ -123,7 +108,7 @@ def process_and_describe(payload: dict):
123
  # --- Gradio MCP Interface ---
124
  demo = gr.Interface(
125
  fn=process_and_describe,
126
- inputs=gr.JSON(label="Input Payload (Dict format with 'image_b64')"),
127
  outputs=gr.JSON(label="Reply to Jetson"),
128
  api_name="predict"
129
  )
 
3
  import gradio as gr
4
  from huggingface_hub import upload_file, InferenceClient
5
  import json
6
+ from fastmcp import MCP, STIO
7
 
8
  # --- Config ---
9
  HF_DATASET_REPO = "OppaAI/Robot_MCP"
10
+ HF_VLM_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
11
 
12
  # --- MCP server instance ---
13
+ mcp = MCP()
14
+
15
+ # --- STIO for the LLM ---
16
+ stio = STIO(mcp) # Bind STIO to MCP tools
17
 
18
  # --- MCP Tool ---
19
  @mcp.tools()
20
+ def say_hi(greeting_text: str = "Hi there!"):
21
  """Return a greeting command in JSON."""
22
+ return {"command": "say_hi", "text": greeting_text}
 
 
 
23
 
24
  # --- Helper Functions ---
25
  def save_and_upload_image(image_b64, hf_token):
 
52
  if not image_b64:
53
  return {"error": "No image provided."}
54
 
55
+ # Save image & upload
56
  local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64, hf_token)
57
+
58
+ # Initialize HF client
59
  hf_client = InferenceClient(token=hf_token)
60
 
61
+ # --- System prompt with STIO instructions ---
62
+ system_prompt = f"""
63
+ You are a helpful robot assistant. You have access to MCP tools via STIO.
64
+ When you receive an image, you must:
65
+ 1️⃣ Describe the image in detail.
66
+ 2️⃣ Decide actions for the robot. Example:
67
+ - Human figure call `say_hi` tool with a friendly greeting (vary every time)
68
+ 3️⃣ Use STIO to call the tools. Always respond in JSON if calling tools.
69
+
70
+ Available tools:
71
+ {stio.describe_tools()}
 
72
  """
73
 
74
  messages_payload = [
 
79
  ]}
80
  ]
81
 
82
+ # --- Call VLM with STIO ---
83
  chat_completion = hf_client.chat.completions.create(
84
  model=HF_VLM_MODEL,
85
  messages=messages_payload,
86
  max_tokens=300
87
  )
88
 
 
89
  vlm_text = chat_completion.choices[0].message.content.strip()
90
+
91
+ # --- Use STIO to execute tool calls if present ---
92
+ tool_results = stio.run(vlm_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
  return {
95
  "saved_to_hf_hub": True,
 
99
  "file_size_bytes": size_bytes,
100
  "robot_id": robot_id,
101
  "vlm_response": vlm_text,
102
+ "tool_results": tool_results
 
 
103
  }
104
 
105
  except Exception as e:
 
108
  # --- Gradio MCP Interface ---
109
  demo = gr.Interface(
110
  fn=process_and_describe,
111
+ inputs=gr.JSON(label="Input Payload"),
112
  outputs=gr.JSON(label="Reply to Jetson"),
113
  api_name="predict"
114
  )