Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,17 +7,10 @@ from huggingface_hub import upload_file, InferenceClient
|
|
| 7 |
from PIL import Image
|
| 8 |
|
| 9 |
# --- Config ---
|
| 10 |
-
#HF_TOKEN = os.environ.get("HF_CV_ROBOT_TOKEN")
|
| 11 |
HF_DATASET_REPO = "OppaAI/Robot_MCP"
|
| 12 |
# Model specifically for VLM (image-to-text) tasks on Hugging Face
|
| 13 |
HF_VLM_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
|
| 14 |
|
| 15 |
-
#if not HF_TOKEN:
|
| 16 |
-
# raise ValueError("HF_TOKEN environment variable not set.")
|
| 17 |
-
|
| 18 |
-
# Initialize the Hugging Face Inference Client
|
| 19 |
-
hf_client = InferenceClient(token=HF_TOKEN)
|
| 20 |
-
|
| 21 |
# --- Helper Functions ---
|
| 22 |
def save_and_upload_image(image_b64):
|
| 23 |
"""Save image to /tmp and upload to HF dataset."""
|
|
@@ -42,33 +35,38 @@ def save_and_upload_image(image_b64):
|
|
| 42 |
# --- Main MCP function ---
|
| 43 |
def process_and_describe(payload: dict):
|
| 44 |
try:
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
robot_id = payload.get("robot_id", "unknown")
|
| 47 |
image_b64 = payload["image_b64"]
|
| 48 |
|
| 49 |
-
#
|
| 50 |
local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64)
|
| 51 |
-
|
| 52 |
-
#
|
|
|
|
|
|
|
|
|
|
| 53 |
messages_payload = [
|
| 54 |
{
|
| 55 |
"role": "user",
|
| 56 |
"content": [
|
| 57 |
{"type": "text", "text": "Describe this image in detail."},
|
| 58 |
-
# Pass the original Base64 string directly in the required format
|
| 59 |
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}},
|
| 60 |
],
|
| 61 |
}
|
| 62 |
]
|
| 63 |
|
| 64 |
-
#
|
| 65 |
chat_completion = hf_client.chat.completions.create(
|
| 66 |
model=HF_VLM_MODEL,
|
| 67 |
messages=messages_payload,
|
| 68 |
-
max_tokens=150,
|
| 69 |
)
|
| 70 |
-
|
| 71 |
-
# Extract the text content from the response object
|
| 72 |
vlm_text = chat_completion.choices[0].message.content.strip()
|
| 73 |
|
| 74 |
return {
|
|
@@ -82,7 +80,6 @@ def process_and_describe(payload: dict):
|
|
| 82 |
}
|
| 83 |
|
| 84 |
except Exception as e:
|
| 85 |
-
# Added better error handling
|
| 86 |
return {"error": f"An API error occurred: {str(e)}"}
|
| 87 |
|
| 88 |
# --- Gradio MCP Interface ---
|
|
|
|
| 7 |
from PIL import Image
|
| 8 |
|
| 9 |
# --- Config ---
|
|
|
|
| 10 |
HF_DATASET_REPO = "OppaAI/Robot_MCP"
|
| 11 |
# Model specifically for VLM (image-to-text) tasks on Hugging Face
|
| 12 |
HF_VLM_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
# --- Helper Functions ---
|
| 15 |
def save_and_upload_image(image_b64):
|
| 16 |
"""Save image to /tmp and upload to HF dataset."""
|
|
|
|
| 35 |
# --- Main MCP function ---
|
| 36 |
def process_and_describe(payload: dict):
|
| 37 |
try:
|
| 38 |
+
# 1️⃣ Use robot-sent token if available, otherwise fallback
|
| 39 |
+
hf_token = payload.get("hf_token")
|
| 40 |
+
if not hf_token:
|
| 41 |
+
return {"error": "HF token not provided in payload."}
|
| 42 |
+
|
| 43 |
robot_id = payload.get("robot_id", "unknown")
|
| 44 |
image_b64 = payload["image_b64"]
|
| 45 |
|
| 46 |
+
# 2️⃣ Save image temporarily (for tracking)
|
| 47 |
local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64)
|
| 48 |
+
|
| 49 |
+
# 3️⃣ Initialize HF client per request
|
| 50 |
+
hf_client = InferenceClient(token=hf_token)
|
| 51 |
+
|
| 52 |
+
# 4️⃣ Prepare multimodal message payload
|
| 53 |
messages_payload = [
|
| 54 |
{
|
| 55 |
"role": "user",
|
| 56 |
"content": [
|
| 57 |
{"type": "text", "text": "Describe this image in detail."},
|
|
|
|
| 58 |
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}},
|
| 59 |
],
|
| 60 |
}
|
| 61 |
]
|
| 62 |
|
| 63 |
+
# 5️⃣ Call VLM
|
| 64 |
chat_completion = hf_client.chat.completions.create(
|
| 65 |
model=HF_VLM_MODEL,
|
| 66 |
messages=messages_payload,
|
| 67 |
+
max_tokens=150,
|
| 68 |
)
|
| 69 |
+
|
|
|
|
| 70 |
vlm_text = chat_completion.choices[0].message.content.strip()
|
| 71 |
|
| 72 |
return {
|
|
|
|
| 80 |
}
|
| 81 |
|
| 82 |
except Exception as e:
|
|
|
|
| 83 |
return {"error": f"An API error occurred: {str(e)}"}
|
| 84 |
|
| 85 |
# --- Gradio MCP Interface ---
|