Spaces:

OppaAI
/

Robot_MCP_Server

Sleeping

App Files Files Community

OppaAI commited on Nov 20, 2025

Commit

a4f7543

verified ·

1 Parent(s): 024277f

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -81

app.py CHANGED Viewed

@@ -18,38 +18,23 @@ PROCESSED_REQUESTS: Dict[str, Dict[str, Any]] = {}
 PROCESSED_LOCK = threading.Lock()
 # ==========================================
-# Robot Tools (unchanged semantics)
 # ==========================================
 def tool_speak(text: str, emotion: str = "neutral") -> dict:
-    return {
-        "status": "success",
-        "action_executed": "speak",
-        "payload": {"text": text, "emotion": emotion}
-    }
 def tool_navigate(direction: str, distance_meters: float) -> dict:
     if distance_meters > 5.0:
         return {"status": "error", "message": "Safety limit: Cannot move more than 5m at once."}
-    return {
-        "status": "success",
-        "action_executed": "navigate",
-        "payload": {"direction": direction, "distance": distance_meters}
-    }
 def tool_scan_hazard(hazard_type: str, severity: str) -> dict:
     timestamp = datetime.now().isoformat()
     log_entry = f"[{timestamp}] WARNING: {hazard_type} detected (Severity: {severity})"
-    # (in real system: write to file/logging infra)
-    return {
-        "status": "warning_logged",
-        "log": log_entry
-    }
 def tool_analyze_human(clothing_color: str, estimated_action: str) -> dict:
-    return {
-        "status": "human_tracked",
-        "details": f"Human wearing {clothing_color} is likely {estimated_action}."
-    }
 TOOL_REGISTRY = {
     "speak": tool_speak,
@@ -59,33 +44,23 @@ TOOL_REGISTRY = {
 }
 # ==========================================
-# Helper: Save & Upload (robust)
 # ==========================================
 def save_and_upload_image(image_b64: str, hf_token: str) -> Tuple[Optional[str], Optional[str], Optional[str], int]:
-    """
-    Save a base64 image to a uniquely named /tmp file and upload to HF dataset repo.
-    Returns: local_tmp_path, hf_url, path_in_repo, size_bytes
-    """
     try:
-        # decode
         image_bytes = base64.b64decode(image_b64)
         size_bytes = len(image_bytes)
-        print("[debug] decoded image bytes:", size_bytes)
         if size_bytes < 10:
             raise ValueError("Decoded image is too small or invalid base64")
-        # unique tmp filename (avoid collision across workers)
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
         local_tmp_path = f"/tmp/robot_img_{timestamp}.jpg"
         with open(local_tmp_path, "wb") as f:
             f.write(image_bytes)
-        print(f"[debug] wrote local tmp file: {local_tmp_path}")
-        # Prepare filename in repo (put at repo root to avoid folder permission issues)
         filename = f"robot_{timestamp}.jpg"
         path_in_repo = filename
-        # upload_file might raise. capture exception and show traceback
         upload_file(
             path_or_fileobj=local_tmp_path,
             path_in_repo=path_in_repo,
@@ -95,31 +70,21 @@ def save_and_upload_image(image_b64: str, hf_token: str) -> Tuple[Optional[str],
         )
         hf_image_url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/{path_in_repo}"
-        print("[debug] upload successful:", hf_image_url)
         return local_tmp_path, hf_image_url, path_in_repo, size_bytes
     except Exception as e:
-        print("[error] save_and_upload_image failed:", e)
         traceback.print_exc()
         return None, None, None, 0
 # ==========================================
-# Main logic
 # ==========================================
 def safe_parse_json_from_text(text: str) -> Optional[dict]:
-    """
-    Try to extract JSON object from model output.
-    Accepts raw JSON, or a ```json\n{...}``` block, or text with JSON substring.
-    Returns dict or None.
-    """
     if not text:
         return None
-    # remove markdown fences
     t = text.strip()
     if t.startswith("```") and "```" in t[3:]:
-        # remove outer fences
         t = t.strip("`")
-    # find first '{' and last '}' to try to extract JSON substring
     start = t.find("{")
     end = t.rfind("}")
     if start >= 0 and end > start:
@@ -127,7 +92,6 @@ def safe_parse_json_from_text(text: str) -> Optional[dict]:
         try:
             return json.loads(candidate)
         except Exception:
-            # fallback: try the whole text
             try:
                 return json.loads(t)
             except Exception:
@@ -138,44 +102,46 @@ def safe_parse_json_from_text(text: str) -> Optional[dict]:
         except Exception:
             return None
 def validate_and_call_tool(tool_name: str, tool_args: dict):
     if not tool_name:
         return {"error": "No tool_name provided by VLM."}
     if tool_name not in TOOL_REGISTRY:
         return {"error": f"Tool '{tool_name}' not found in registry."}
-    # safe-call: ensure dict args only contain acceptable keys for that tool
     try:
-        result = TOOL_REGISTRY[tool_name](**tool_args)
-        return result
     except TypeError as e:
         return {"error": f"Tool call argument mismatch: {str(e)}"}
     except Exception as e:
         traceback.print_exc()
         return {"error": f"Tool execution failed: {str(e)}"}
-def process_and_describe(payload: dict):
-    """
-    payload expects keys:
-      - hf_token (string)
-      - image_b64 (base64 str)
-      - robot_id (optional)
-      - request_id (optional)  # recommended to dedupe retries
-    """
     vlm_text = ""
     tool_result = None
     action_data = {}
     try:
-        # basic checks
         hf_token = payload.get("hf_token")
         if not hf_token:
-            return {"error": "HF token not provided in payload. Token must have datasets write permission if uploading."}
         request_id = payload.get("request_id") or payload.get("robot_id") or None
         if request_id:
             with PROCESSED_LOCK:
                 if request_id in PROCESSED_REQUESTS:
-                    print("[info] duplicate request_id detected; returning cached result")
                     return PROCESSED_REQUESTS[request_id]
         robot_id = payload.get("robot_id", "unknown")
@@ -183,20 +149,11 @@ def process_and_describe(payload: dict):
         if not image_b64:
             return {"error": "No image provided in payload."}
-        # Save & upload (only once per invocation)
         local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64, hf_token)
         if not hf_url:
-            # Upload failed: return error with helpful debug info
-            return {
-                "error": "Image upload failed on server.",
-                "debug": {
-                    "local_tmp_path": local_tmp_path,
-                    "path_in_repo": path_in_repo,
-                    "size_bytes": size_bytes
-                }
-            }
-        # Build system prompt (kept compact)
         tools_desc = json.dumps({
             "speak": {"text": "string", "emotion": "string"},
             "navigate": {"direction": "forward/left/right", "distance_meters": "float"},
@@ -222,7 +179,6 @@ RESPONSE FORMAT (Strict JSON):
 }}
 """
-        # Build messages payload for VLM - include the uploaded HF URL (some VLMs can fetch it)
         messages_payload = [
             {"role": "system", "content": system_prompt},
             {"role": "user", "content": [
@@ -231,10 +187,7 @@ RESPONSE FORMAT (Strict JSON):
             ]}
         ]
-        # Instantiate HF Inference client and call chat completion
         hf_client = InferenceClient(token=hf_token)
-        # NOTE: huggingface InferenceClient usage may vary by version. We use the chat completions create call.
         chat_completion = hf_client.chat.completions.create(
             model=HF_VLM_MODEL,
             messages=messages_payload,
@@ -243,12 +196,8 @@ RESPONSE FORMAT (Strict JSON):
         )
         vlm_text = chat_completion.choices[0].message.content.strip()
-        print("[debug] VLM raw output:", vlm_text[:1000])
-        # attempt to parse JSON
         parsed = safe_parse_json_from_text(vlm_text)
         if parsed is None:
-            # If the model didn't return JSON, return descriptive fallback but do not execute tools
             result = {
                 "status": "model_no_json",
                 "robot_id": robot_id,
@@ -264,13 +213,9 @@ RESPONSE FORMAT (Strict JSON):
         action_data = parsed
         tool_name = action_data.get("tool_name")
         tool_args = action_data.get("arguments", {}) or {}
-        # Validate that arguments is a dict
         if not isinstance(tool_args, dict):
             tool_args = {}
-        # Execute the tool once and capture result
-        print(f"[info] Executing tool: {tool_name} with args {tool_args}")
         tool_result = validate_and_call_tool(tool_name, tool_args)
         result = {
@@ -306,5 +251,4 @@ iface = gr.Interface(
 )
 if __name__ == "__main__":
-    # When deploying to HF Space: set server_name and server_port via env if you need
     iface.launch()

 PROCESSED_LOCK = threading.Lock()
 # ==========================================
+# Robot Tools
 # ==========================================
 def tool_speak(text: str, emotion: str = "neutral") -> dict:
+    return {"status": "success", "action_executed": "speak", "payload": {"text": text, "emotion": emotion}}
 def tool_navigate(direction: str, distance_meters: float) -> dict:
     if distance_meters > 5.0:
         return {"status": "error", "message": "Safety limit: Cannot move more than 5m at once."}
+    return {"status": "success", "action_executed": "navigate", "payload": {"direction": direction, "distance": distance_meters}}
 def tool_scan_hazard(hazard_type: str, severity: str) -> dict:
     timestamp = datetime.now().isoformat()
     log_entry = f"[{timestamp}] WARNING: {hazard_type} detected (Severity: {severity})"
+    return {"status": "warning_logged", "log": log_entry}
 def tool_analyze_human(clothing_color: str, estimated_action: str) -> dict:
+    return {"status": "human_tracked", "details": f"Human wearing {clothing_color} is likely {estimated_action}."}
 TOOL_REGISTRY = {
     "speak": tool_speak,
 }
 # ==========================================
+# Helper: Save & Upload
 # ==========================================
 def save_and_upload_image(image_b64: str, hf_token: str) -> Tuple[Optional[str], Optional[str], Optional[str], int]:
     try:
         image_bytes = base64.b64decode(image_b64)
         size_bytes = len(image_bytes)
         if size_bytes < 10:
             raise ValueError("Decoded image is too small or invalid base64")
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
         local_tmp_path = f"/tmp/robot_img_{timestamp}.jpg"
         with open(local_tmp_path, "wb") as f:
             f.write(image_bytes)
         filename = f"robot_{timestamp}.jpg"
         path_in_repo = filename
         upload_file(
             path_or_fileobj=local_tmp_path,
             path_in_repo=path_in_repo,
         )
         hf_image_url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/{path_in_repo}"
         return local_tmp_path, hf_image_url, path_in_repo, size_bytes
     except Exception as e:
         traceback.print_exc()
         return None, None, None, 0
 # ==========================================
+# JSON parsing helper
 # ==========================================
 def safe_parse_json_from_text(text: str) -> Optional[dict]:
     if not text:
         return None
     t = text.strip()
     if t.startswith("```") and "```" in t[3:]:
         t = t.strip("`")
     start = t.find("{")
     end = t.rfind("}")
     if start >= 0 and end > start:
         try:
             return json.loads(candidate)
         except Exception:
             try:
                 return json.loads(t)
             except Exception:
         except Exception:
             return None
+# ==========================================
+# Tool executor
+# ==========================================
 def validate_and_call_tool(tool_name: str, tool_args: dict):
     if not tool_name:
         return {"error": "No tool_name provided by VLM."}
     if tool_name not in TOOL_REGISTRY:
         return {"error": f"Tool '{tool_name}' not found in registry."}
     try:
+        return TOOL_REGISTRY[tool_name](**tool_args)
     except TypeError as e:
         return {"error": f"Tool call argument mismatch: {str(e)}"}
     except Exception as e:
         traceback.print_exc()
         return {"error": f"Tool execution failed: {str(e)}"}
+# ==========================================
+# Main logic
+# ==========================================
+def process_and_describe(payload):
+    # If payload is str, try to parse it
+    if isinstance(payload, str):
+        try:
+            payload = json.loads(payload)
+        except Exception as e:
+            return {"error": f"Invalid JSON string: {str(e)}"}
     vlm_text = ""
     tool_result = None
     action_data = {}
     try:
         hf_token = payload.get("hf_token")
         if not hf_token:
+            return {"error": "HF token not provided in payload."}
         request_id = payload.get("request_id") or payload.get("robot_id") or None
         if request_id:
             with PROCESSED_LOCK:
                 if request_id in PROCESSED_REQUESTS:
                     return PROCESSED_REQUESTS[request_id]
         robot_id = payload.get("robot_id", "unknown")
         if not image_b64:
             return {"error": "No image provided in payload."}
         local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64, hf_token)
         if not hf_url:
+            return {"error": "Image upload failed.", "debug": {"local_tmp_path": local_tmp_path, "size_bytes": size_bytes}}
+        # Build system prompt
         tools_desc = json.dumps({
             "speak": {"text": "string", "emotion": "string"},
             "navigate": {"direction": "forward/left/right", "distance_meters": "float"},
 }}
 """
         messages_payload = [
             {"role": "system", "content": system_prompt},
             {"role": "user", "content": [
             ]}
         ]
         hf_client = InferenceClient(token=hf_token)
         chat_completion = hf_client.chat.completions.create(
             model=HF_VLM_MODEL,
             messages=messages_payload,
         )
         vlm_text = chat_completion.choices[0].message.content.strip()
         parsed = safe_parse_json_from_text(vlm_text)
         if parsed is None:
             result = {
                 "status": "model_no_json",
                 "robot_id": robot_id,
         action_data = parsed
         tool_name = action_data.get("tool_name")
         tool_args = action_data.get("arguments", {}) or {}
         if not isinstance(tool_args, dict):
             tool_args = {}
         tool_result = validate_and_call_tool(tool_name, tool_args)
         result = {
 )
 if __name__ == "__main__":
     iface.launch()