Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,93 +1,65 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
-
import json
|
| 3 |
import base64
|
| 4 |
-
from PIL import Image
|
| 5 |
-
import io
|
| 6 |
import requests
|
| 7 |
-
import
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
|
| 13 |
-
#
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
| 18 |
|
| 19 |
-
|
| 20 |
-
#
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
try:
|
| 24 |
-
if not HF_TOKEN:
|
| 25 |
-
return {"error": "Hugging Face token is missing. Please check Space secrets."}
|
| 26 |
-
|
| 27 |
-
robot_id = payload.get("robot_id", "unknown")
|
| 28 |
-
image_b64 = payload["image_b64"]
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
# so this part is technically unnecessary for the API call, but harmless.
|
| 34 |
-
# img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
|
| 35 |
|
| 36 |
-
#
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
"model": MODEL,
|
| 40 |
"messages": [
|
| 41 |
{
|
| 42 |
"role": "user",
|
| 43 |
"content": [
|
| 44 |
-
{"type": "text", "text": "Describe this image in detail."},
|
| 45 |
-
{"type": "
|
| 46 |
]
|
| 47 |
}
|
| 48 |
]
|
| 49 |
}
|
| 50 |
|
| 51 |
-
|
| 52 |
-
"
|
| 53 |
-
|
| 54 |
-
json=data,
|
| 55 |
-
timeout=60
|
| 56 |
-
)
|
| 57 |
-
|
| 58 |
-
if resp.status_code != 200:
|
| 59 |
-
# Added more detail to error logging
|
| 60 |
-
print(f"VLM API error: {resp.status_code}, {resp.text}")
|
| 61 |
-
return {"error": f"VLM API error: {resp.status_code}, {resp.text}"}
|
| 62 |
-
|
| 63 |
-
# Check if the expected response structure exists before accessing it
|
| 64 |
-
try:
|
| 65 |
-
vlm_text = resp.json()["choices"][0]["message"]["content"][0]["text"]
|
| 66 |
-
except (KeyError, IndexError, json.JSONDecodeError) as e:
|
| 67 |
-
return {"error": f"Failed to parse VLM response: {e}, Response text: {resp.text}"}
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
return {
|
| 71 |
-
"received": True,
|
| 72 |
-
"robot_id": robot_id,
|
| 73 |
-
"vllm_analysis": vlm_text
|
| 74 |
}
|
| 75 |
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
outputs=gr.JSON(label="Reply to Jetson"),
|
| 88 |
-
api_name="predict"
|
| 89 |
-
)
|
| 90 |
|
|
|
|
|
|
|
|
|
|
| 91 |
if __name__ == "__main__":
|
| 92 |
-
|
|
|
|
|
|
|
| 93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import base64
|
|
|
|
|
|
|
| 2 |
import requests
|
| 3 |
+
import json
|
| 4 |
+
import io
|
| 5 |
+
from PIL import Image
|
| 6 |
+
import tempfile
|
| 7 |
+
import time
|
| 8 |
|
| 9 |
+
# ----------------------------
|
| 10 |
+
# OpenRouter API 設定
|
| 11 |
+
# ----------------------------
|
| 12 |
+
OPENROUTER_KEY = "YOUR_OPENROUTER_API_KEY"
|
| 13 |
+
MODEL_NAME = "qwen/qwen3-vl-32b-instruct"
|
| 14 |
+
API_URL = "https://openrouter.ai/api/v1/chat/completions"
|
| 15 |
|
| 16 |
+
def send_image_to_vlm(b64_image: str):
|
| 17 |
+
# 1️⃣ base64 -> PIL Image
|
| 18 |
+
img_bytes = base64.b64decode(b64_image)
|
| 19 |
+
img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
+
# 2️⃣ 存成臨時檔
|
| 22 |
+
with tempfile.NamedTemporaryFile(suffix=".jpg") as tmp_file:
|
| 23 |
+
img.save(tmp_file.name)
|
|
|
|
|
|
|
| 24 |
|
| 25 |
+
# 3️⃣ 組 payload,使用 file 上傳
|
| 26 |
+
payload = {
|
| 27 |
+
"model": MODEL_NAME,
|
|
|
|
| 28 |
"messages": [
|
| 29 |
{
|
| 30 |
"role": "user",
|
| 31 |
"content": [
|
| 32 |
+
{"type": "text", "text": "Describe the content of this image in detail."},
|
| 33 |
+
{"type": "file", "file": tmp_file.name} # 這裡上傳檔案
|
| 34 |
]
|
| 35 |
}
|
| 36 |
]
|
| 37 |
}
|
| 38 |
|
| 39 |
+
headers = {
|
| 40 |
+
"Authorization": f"Bearer {OPENROUTER_KEY}",
|
| 41 |
+
"Content-Type": "application/json"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
}
|
| 43 |
|
| 44 |
+
# 4️⃣ 呼叫 API
|
| 45 |
+
resp = requests.post(API_URL, headers=headers, data=json.dumps(payload), timeout=120)
|
| 46 |
+
if resp.status_code == 200:
|
| 47 |
+
result = resp.json()
|
| 48 |
+
# 回傳生成的文字
|
| 49 |
+
try:
|
| 50 |
+
return result["choices"][0]["message"]["content"][0]["text"]
|
| 51 |
+
except:
|
| 52 |
+
return str(result)
|
| 53 |
+
else:
|
| 54 |
+
return f"VLM API error {resp.status_code}: {resp.text}"
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
+
# ----------------------------
|
| 57 |
+
# 測試
|
| 58 |
+
# ----------------------------
|
| 59 |
if __name__ == "__main__":
|
| 60 |
+
# 這裡放一張你抓到的 base64 測試
|
| 61 |
+
with open("test.jpg", "rb") as f:
|
| 62 |
+
b64_test = base64.b64encode(f.read()).decode("utf-8")
|
| 63 |
|
| 64 |
+
analysis = send_image_to_vlm(b64_test)
|
| 65 |
+
print("VLM 分析結果:", analysis)
|