Spaces:
Sleeping
Sleeping
File size: 3,110 Bytes
0ef482f 938f609 48607b7 1f8048b 938f609 9d41b1d 48607b7 1f8048b c5129eb 1f8048b 406e27f 1f8048b 49eb2ad 1f8048b 49eb2ad 1f8048b 406e27f 1f8048b 9d41b1d 406e27f 938f609 0ef482f dac9550 48607b7 17438da 48607b7 dac9550 406e27f dac9550 c5129eb 9d41b1d dac9550 c5129eb 9d41b1d c5129eb dac9550 48607b7 dac9550 c5129eb 48607b7 dd3451f 53af268 938f609 48607b7 c5129eb dd3451f ec3d9e7 0ef482f cd798bc d081bf3 9a56bc2 444e2a5 0ef482f c5129eb 17438da |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import os
import base64
import requests
import tempfile
import gradio as gr
from huggingface_hub import upload_file, InferenceClient
from PIL import Image
# --- Config ---
HF_DATASET_REPO = "OppaAI/Robot_MCP"
# Model specifically for VLM (image-to-text) tasks on Hugging Face
HF_VLM_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
# --- Helper Functions ---
def save_and_upload_image(image_b64, hf_token):
"""Save image to /tmp and upload to HF dataset."""
image_bytes = base64.b64decode(image_b64)
local_tmp_path = f"/tmp/tmp.jpg"
with open(local_tmp_path, "wb") as f:
f.write(image_bytes)
path_in_repo = f"images/tmp.jpg"
upload_file(
path_or_fileobj=local_tmp_path,
path_in_repo=path_in_repo,
repo_id=HF_DATASET_REPO,
token=hf_token, # ← use token from payload
repo_type="dataset"
)
hf_image_url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/{path_in_repo}"
return local_tmp_path, hf_image_url, path_in_repo, len(image_bytes)
# --- Main MCP function ---
def process_and_describe(payload: dict):
try:
# 1️⃣ Use robot-sent token if available, otherwise fallback
hf_token = payload.get("hf_token")
if not hf_token:
return {"error": "HF token not provided in payload."}
robot_id = payload.get("robot_id", "unknown")
image_b64 = payload["image_b64"]
# 2️⃣ Save image temporarily (for tracking)
local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64, hf_token)
# 3️⃣ Initialize HF client per request
hf_client = InferenceClient(token=hf_token)
# 4️⃣ Prepare multimodal message payload
messages_payload = [
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this image in detail."},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}},
],
}
]
# 5️⃣ Call VLM
chat_completion = hf_client.chat.completions.create(
model=HF_VLM_MODEL,
messages=messages_payload,
max_tokens=150,
)
vlm_text = chat_completion.choices[0].message.content.strip()
return {
"saved_to_hf_hub": True,
"repo_id": HF_DATASET_REPO,
"path_in_repo": path_in_repo,
"image_url": hf_url,
"file_size_bytes": size_bytes,
"robot_id": robot_id,
"vlm_description": vlm_text
}
except Exception as e:
return {"error": f"An API error occurred: {str(e)}"}
# --- Gradio MCP Interface ---
demo = gr.Interface(
fn=process_and_describe,
inputs=gr.JSON(label="Input Payload (Dict format with 'image_b64')"),
outputs=gr.JSON(label="Reply to Jetson"),
api_name="predict"
)
if __name__ == "__main__":
# Ensure you have the latest huggingface-hub: pip install --upgrade huggingface-hub Pillow requests
demo.launch(mcp_server=True)
|