OppaAI commited on
Commit
71865dd
·
verified ·
1 Parent(s): 23f2922

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -40
app.py CHANGED
@@ -8,74 +8,80 @@ import os
8
  HF_TOKEN = os.environ.get("HF_CV_ROBOT_TOKEN")
9
  MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
10
 
11
- if not HF_TOKEN:
12
- print("ERROR: HF_CV_ROBOT_TOKEN environment variable not set.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  def process(payload: dict):
15
  try:
16
  if not HF_TOKEN:
17
- return {"error": "Missing HF token"}
18
 
19
  robot_id = payload.get("robot_id", "unknown")
20
- image_b64 = payload["image_b64"]
21
 
22
- # Save Base64 temp file
 
23
  img_bytes = base64.b64decode(image_b64)
24
- temp_path = "temp.jpg"
25
- with open(temp_path, "wb") as f:
26
- f.write(img_bytes)
27
-
28
- # HF Router expects:
29
- # data = model + messages (string)
30
- # files = file
31
- messages_json = json.dumps([
32
- {
33
- "role": "user",
34
- "content": [
35
- {"type": "text", "text": "Describe this image in detail."}
36
- ]
37
- }
38
- ])
 
 
39
 
40
  resp = requests.post(
41
  "https://router.huggingface.co/v1/chat/completions",
42
- headers={"Authorization": f"Bearer {HF_TOKEN}"},
43
- data={
44
- "model": MODEL, # <- 放在這裡才對!
45
- "messages": messages_json # <- 必須是字串
46
- },
47
- files={
48
- "file": ("image.jpg", open(temp_path, "rb"), "image/jpeg")
49
- },
50
  timeout=60
51
  )
52
 
53
  if resp.status_code != 200:
54
- print(f"VLM API error: {resp.status_code}, {resp.text}")
55
  return {"error": f"VLM API error: {resp.status_code}, {resp.text}"}
56
 
57
- # Parse
58
- out = resp.json()
59
- parts = out["choices"][0]["message"]["content"]
60
-
61
- text = ""
62
- for p in parts:
63
- if p["type"] == "text":
64
- text += p["text"]
65
 
66
  return {
67
  "received": True,
68
  "robot_id": robot_id,
69
- "vllm_analysis": text
70
  }
71
 
72
  except Exception as e:
73
  return {"error": str(e)}
74
 
 
75
  demo = gr.Interface(
76
  fn=process,
77
- inputs=gr.JSON(label="Input Payload"),
78
- outputs=gr.JSON(label="Reply"),
79
  api_name="predict"
80
  )
81
 
 
8
  HF_TOKEN = os.environ.get("HF_CV_ROBOT_TOKEN")
9
  MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
10
 
11
+ HF_UPLOAD_URL = "https://huggingface.co/api/uploads"
12
+
13
+ def upload_to_hf(bytes_data):
14
+ """Upload image bytes to HF and return image_url."""
15
+ resp = requests.post(
16
+ HF_UPLOAD_URL,
17
+ headers={"Authorization": f"Bearer {HF_TOKEN}"},
18
+ files={"file": ("temp.jpg", bytes_data, "image/jpeg")}
19
+ )
20
+
21
+ if resp.status_code != 200:
22
+ raise RuntimeError(f"HF upload failed: {resp.text}")
23
+
24
+ url = resp.json()["url"]
25
+ return url
26
+
27
 
28
  def process(payload: dict):
29
  try:
30
  if not HF_TOKEN:
31
+ return {"error": "Missing HF token."}
32
 
33
  robot_id = payload.get("robot_id", "unknown")
 
34
 
35
+ # --- get image bytes
36
+ image_b64 = payload["image_b64"]
37
  img_bytes = base64.b64decode(image_b64)
38
+
39
+ # --- upload to HF (get public URL)
40
+ image_url = upload_to_hf(img_bytes)
41
+
42
+ # --- VLM request (image_url only)
43
+ data = {
44
+ "model": MODEL,
45
+ "messages": [
46
+ {
47
+ "role": "user",
48
+ "content": [
49
+ {"type": "text", "text": "Describe this image in detail."},
50
+ {"type": "image_url", "image_url": {"url": image_url}}
51
+ ]
52
+ }
53
+ ]
54
+ }
55
 
56
  resp = requests.post(
57
  "https://router.huggingface.co/v1/chat/completions",
58
+ headers={"Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json"},
59
+ data=json.dumps(data),
 
 
 
 
 
 
60
  timeout=60
61
  )
62
 
63
  if resp.status_code != 200:
 
64
  return {"error": f"VLM API error: {resp.status_code}, {resp.text}"}
65
 
66
+ try:
67
+ vlm_text = resp.json()["choices"][0]["message"]["content"][0]["text"]
68
+ except:
69
+ return {"error": f"Bad VLM response: {resp.text}"}
 
 
 
 
70
 
71
  return {
72
  "received": True,
73
  "robot_id": robot_id,
74
+ "vllm_analysis": vlm_text
75
  }
76
 
77
  except Exception as e:
78
  return {"error": str(e)}
79
 
80
+
81
  demo = gr.Interface(
82
  fn=process,
83
+ inputs=gr.JSON(label="Input Payload (Dict format)"),
84
+ outputs=gr.JSON(label="Reply to Jetson"),
85
  api_name="predict"
86
  )
87