OppaAI commited on
Commit
3de8d94
·
verified ·
1 Parent(s): e37ca9c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -73
app.py CHANGED
@@ -1,93 +1,65 @@
1
- import gradio as gr
2
- import json
3
  import base64
4
- from PIL import Image
5
- import io
6
  import requests
7
- import os
8
-
9
- # Get token from environment variable
10
- HF_TOKEN = os.environ.get("HF_CV_ROBOT_TOKEN")
11
- MODEL = "Qwen/Qwen2-VL-7B-Instruct"
12
 
13
- # Check if the token is available when the script starts
14
- if not HF_TOKEN:
15
- print("ERROR: HF_CV_ROBOT_TOKEN environment variable not set.")
16
- # In a real app, you might want to stop execution or handle this more gracefully
17
- # For a Gradio app in a Space, it might just fail upon the first request.
 
18
 
19
- # -------------------------------
20
- # 主處理函數 (Main Processing Function)
21
- # -------------------------------
22
- def process(payload: dict):
23
- try:
24
- if not HF_TOKEN:
25
- return {"error": "Hugging Face token is missing. Please check Space secrets."}
26
-
27
- robot_id = payload.get("robot_id", "unknown")
28
- image_b64 = payload["image_b64"]
29
 
30
- # Base64 解碼成圖片,用 PIL 開啟 (Decode base64 to image, open with PIL)
31
- img_bytes = base64.b64decode(image_b64)
32
- # We don't actually use the PIL image object in the rest of the code,
33
- # so this part is technically unnecessary for the API call, but harmless.
34
- # img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
35
 
36
- # Router API payload
37
- headers = {"Authorization": f"Bearer {HF_TOKEN}"}
38
- data = {
39
- "model": MODEL,
40
  "messages": [
41
  {
42
  "role": "user",
43
  "content": [
44
- {"type": "text", "text": "Describe this image in detail."},
45
- {"type": "image_data", "image_data": {"b64": image_b64}}
46
  ]
47
  }
48
  ]
49
  }
50
 
51
- resp = requests.post(
52
- "https://router.huggingface.co/v1/chat/completions",
53
- headers=headers,
54
- json=data,
55
- timeout=60
56
- )
57
-
58
- if resp.status_code != 200:
59
- # Added more detail to error logging
60
- print(f"VLM API error: {resp.status_code}, {resp.text}")
61
- return {"error": f"VLM API error: {resp.status_code}, {resp.text}"}
62
-
63
- # Check if the expected response structure exists before accessing it
64
- try:
65
- vlm_text = resp.json()["choices"][0]["message"]["content"][0]["text"]
66
- except (KeyError, IndexError, json.JSONDecodeError) as e:
67
- return {"error": f"Failed to parse VLM response: {e}, Response text: {resp.text}"}
68
-
69
-
70
- return {
71
- "received": True,
72
- "robot_id": robot_id,
73
- "vllm_analysis": vlm_text
74
  }
75
 
76
- except Exception as e:
77
- # Added logging for general exceptions
78
- print(f"An unexpected error occurred: {e}")
79
- return {"error": str(e)}
80
-
81
- # -------------------------------
82
- # Gradio MCP Server
83
- # -------------------------------
84
- demo = gr.Interface(
85
- fn=process,
86
- inputs=gr.JSON(label="Input Payload (Dict format)"),
87
- outputs=gr.JSON(label="Reply to Jetson"),
88
- api_name="predict"
89
- )
90
 
 
 
 
91
  if __name__ == "__main__":
92
- demo.launch(mcp_server=True)
 
 
93
 
 
 
 
 
 
1
  import base64
 
 
2
  import requests
3
+ import json
4
+ import io
5
+ from PIL import Image
6
+ import tempfile
7
+ import time
8
 
9
+ # ----------------------------
10
+ # OpenRouter API 設定
11
+ # ----------------------------
12
+ OPENROUTER_KEY = "YOUR_OPENROUTER_API_KEY"
13
+ MODEL_NAME = "qwen/qwen3-vl-32b-instruct"
14
+ API_URL = "https://openrouter.ai/api/v1/chat/completions"
15
 
16
+ def send_image_to_vlm(b64_image: str):
17
+ # 1️⃣ base64 -> PIL Image
18
+ img_bytes = base64.b64decode(b64_image)
19
+ img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
 
 
 
 
 
 
20
 
21
+ # 2️⃣ 存成臨時檔
22
+ with tempfile.NamedTemporaryFile(suffix=".jpg") as tmp_file:
23
+ img.save(tmp_file.name)
 
 
24
 
25
+ # 3️⃣ payload,使用 file 上傳
26
+ payload = {
27
+ "model": MODEL_NAME,
 
28
  "messages": [
29
  {
30
  "role": "user",
31
  "content": [
32
+ {"type": "text", "text": "Describe the content of this image in detail."},
33
+ {"type": "file", "file": tmp_file.name} # 這裡上傳檔案
34
  ]
35
  }
36
  ]
37
  }
38
 
39
+ headers = {
40
+ "Authorization": f"Bearer {OPENROUTER_KEY}",
41
+ "Content-Type": "application/json"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  }
43
 
44
+ # 4️⃣ 呼叫 API
45
+ resp = requests.post(API_URL, headers=headers, data=json.dumps(payload), timeout=120)
46
+ if resp.status_code == 200:
47
+ result = resp.json()
48
+ # 回傳生成的文字
49
+ try:
50
+ return result["choices"][0]["message"]["content"][0]["text"]
51
+ except:
52
+ return str(result)
53
+ else:
54
+ return f"VLM API error {resp.status_code}: {resp.text}"
 
 
 
55
 
56
+ # ----------------------------
57
+ # 測試
58
+ # ----------------------------
59
  if __name__ == "__main__":
60
+ # 這裡放一張你抓到的 base64 測試
61
+ with open("test.jpg", "rb") as f:
62
+ b64_test = base64.b64encode(f.read()).decode("utf-8")
63
 
64
+ analysis = send_image_to_vlm(b64_test)
65
+ print("VLM 分析結果:", analysis)