OppaAI commited on
Commit
938f609
·
verified ·
1 Parent(s): c8fa46a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -58
app.py CHANGED
@@ -1,84 +1,101 @@
1
- import gradio as gr
2
- import json
3
- import base64
4
  import os
 
 
 
5
  import requests
 
 
 
6
  from huggingface_hub import upload_file
 
7
 
8
- HF_TOKEN = os.environ.get("HF_CV_ROBOT_TOKEN")
9
- HF_DATASET_REPO = "OppaAI/Robot_MCP" # Replace with your dataset repo
10
- MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
 
11
 
12
- def process_and_describe(payload: dict):
13
- if not HF_TOKEN:
14
- return {"error": "HF_TOKEN secret not found in Space settings."}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  try:
17
  robot_id = payload.get("robot_id", "unknown")
18
  image_b64 = payload["image_b64"]
19
- image_bytes = base64.b64decode(image_b64)
20
-
21
- # 1️⃣ Save temporarily
22
- local_tmp_path = "/tmp/uploaded_image.jpg"
23
- with open(local_tmp_path, "wb") as f:
24
- f.write(image_bytes)
25
-
26
- # 2️⃣ Upload to HF dataset repo
27
- path_in_repo = f"images/uploaded_image_{len(image_bytes)}.jpg"
28
- upload_file(
29
- path_or_fileobj=local_tmp_path,
30
- path_in_repo=path_in_repo,
31
- repo_id=HF_DATASET_REPO,
32
- token=HF_TOKEN,
33
- repo_type="dataset"
34
- )
35
- os.remove(local_tmp_path)
36
-
37
- # 3️⃣ Construct public URL
38
- image_url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/{path_in_repo}"
39
-
40
- # 4️⃣ Call VLM
41
- data = {
42
- "model": MODEL,
43
- "messages": [
44
- {
45
- "role": "user",
46
- "content": [
47
- {"type": "text", "text": "Describe this image in detail."},
48
- {"type": "image_url", "image_url": image_url}
49
- ]
50
- }
51
- ]
52
- }
53
 
54
- resp = requests.post(
55
- "https://router.huggingface.co/v1/chat/completions",
56
- headers={"Authorization": f"Bearer {HF_TOKEN}"},
57
- json=data,
58
- timeout=60
 
 
 
 
 
 
59
  )
60
 
61
- if resp.status_code != 200:
62
- vlm_text = f"HF VLM error: {resp.status_code}, {resp.text}"
63
- else:
64
- try:
65
- vlm_text = resp.json()["choices"][0]["message"]["content"][0]["text"]
66
- except Exception as e:
67
- vlm_text = f"Failed to parse VLM response: {e}, Response={resp.text}"
 
 
68
 
69
  return {
70
  "saved_to_hf_hub": True,
71
  "repo_id": HF_DATASET_REPO,
72
  "path_in_repo": path_in_repo,
73
- "image_url": image_url,
74
- "file_size_bytes": len(image_bytes),
75
  "robot_id": robot_id,
76
  "vlm_description": vlm_text
77
  }
78
 
79
  except Exception as e:
80
- return {"error": f"Failed to upload/describe image: {str(e)}"}
81
 
 
82
  demo = gr.Interface(
83
  fn=process_and_describe,
84
  inputs=gr.JSON(label="Input Payload (Dict format with 'image_b64')"),
 
 
 
 
1
  import os
2
+ os.system("pip install dashscope")
3
+ import copy
4
+ import base64
5
  import requests
6
+ import tempfile
7
+ import secrets
8
+ import gradio as gr
9
  from huggingface_hub import upload_file
10
+ from dashscope import MultiModalConversation
11
 
12
+ # --- Config ---
13
+ HF_TOKEN = os.environ.get("HF_TOKEN")
14
+ HF_DATASET_REPO = "OppaAI/Robot_MCP"
15
+ MODEL = "qwen2.5-vl-7b-instruct"
16
 
17
+ if not HF_TOKEN:
18
+ raise ValueError("HF_TOKEN environment variable not set.")
19
+
20
+ # --- Helper Functions ---
21
+ def save_and_upload_image(image_b64):
22
+ """Save image to /tmp and upload to HF dataset."""
23
+ image_bytes = base64.b64decode(image_b64)
24
+ local_tmp_path = "/tmp/tmp.jpg"
25
+ with open(local_tmp_path, "wb") as f:
26
+ f.write(image_bytes)
27
+
28
+ path_in_repo = f"images/uploaded_image_{len(image_bytes)}.jpg"
29
+ upload_file(
30
+ path_or_fileobj=local_tmp_path,
31
+ path_in_repo=path_in_repo,
32
+ repo_id=HF_DATASET_REPO,
33
+ token=HF_TOKEN,
34
+ repo_type="dataset"
35
+ )
36
+
37
+ hf_image_url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/{path_in_repo}"
38
+ return local_tmp_path, hf_image_url, path_in_repo, len(image_bytes)
39
+
40
+ def prepare_vlm_message(image_path, text="Describe this image in detail."):
41
+ """Read local image, encode to base64, and prepare VLM message."""
42
+ with open(image_path, "rb") as f:
43
+ image_b64 = base64.b64encode(f.read()).decode("utf-8")
44
 
45
+ messages = [
46
+ {
47
+ "role": "user",
48
+ "content": [
49
+ {"type": "text", "text": text},
50
+ {"type": "image_data", "image_data": {"b64": image_b64}}
51
+ ]
52
+ }
53
+ ]
54
+ return messages
55
+
56
+ # --- Main MCP function ---
57
+ def process_and_describe(payload: dict):
58
  try:
59
  robot_id = payload.get("robot_id", "unknown")
60
  image_b64 = payload["image_b64"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ # 1️⃣ Save & upload image
63
+ local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64)
64
+
65
+ # 2️⃣ Prepare VLM message
66
+ messages = prepare_vlm_message(local_tmp_path)
67
+
68
+ # 3️⃣ Call VLM using MultiModalConversation
69
+ responses = MultiModalConversation.call(
70
+ model=MODEL,
71
+ messages=messages,
72
+ stream=True
73
  )
74
 
75
+ vlm_text = ""
76
+ for resp in responses:
77
+ if resp.status_code != 200:
78
+ return {"error": f"VLM call failed: {resp.status_code}"}
79
+ content = resp.output.choices[0].message.content
80
+ # Extract text from response
81
+ for ele in content:
82
+ if "text" in ele:
83
+ vlm_text += ele["text"]
84
 
85
  return {
86
  "saved_to_hf_hub": True,
87
  "repo_id": HF_DATASET_REPO,
88
  "path_in_repo": path_in_repo,
89
+ "image_url": hf_url,
90
+ "file_size_bytes": size_bytes,
91
  "robot_id": robot_id,
92
  "vlm_description": vlm_text
93
  }
94
 
95
  except Exception as e:
96
+ return {"error": str(e)}
97
 
98
+ # --- Gradio MCP Interface ---
99
  demo = gr.Interface(
100
  fn=process_and_describe,
101
  inputs=gr.JSON(label="Input Payload (Dict format with 'image_b64')"),