i0xs0 commited on
Commit
387c3cb
·
verified ·
1 Parent(s): 8f6d933

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -43
app.py CHANGED
@@ -1,44 +1,30 @@
1
  import gradio as gr
2
- import spaces
3
  from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
4
  from qwen_vl_utils import process_vision_info
5
  import torch
6
  from PIL import Image
7
- import subprocess
8
  from datetime import datetime
9
  import numpy as np
10
  import os
11
 
12
-
13
- # subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
14
-
15
- # models = {
16
- # "Qwen/Qwen2-VL-7B-Instruct": AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", trust_remote_code=True, torch_dtype="auto", _attn_implementation="flash_attention_2").cuda().eval()
17
-
18
- # }
19
  def array_to_image_path(image_array):
20
  if image_array is None:
21
  raise ValueError("No image provided. Please upload an image before submitting.")
22
- # Convert numpy array to PIL Image
23
  img = Image.fromarray(np.uint8(image_array))
24
-
25
- # Generate a unique filename using timestamp
26
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
27
  filename = f"image_{timestamp}.png"
28
-
29
- # Save the image
30
  img.save(filename)
31
-
32
- # Get the full path of the saved image
33
- full_path = os.path.abspath(filename)
34
-
35
- return full_path
36
 
37
- device = "mps" if torch.backends.mps.is_available() else "cpu"
 
38
 
39
  models = {
40
- "Qwen/Qwen2-VL-7B-Instruct": Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", trust_remote_code=True, torch_dtype="auto").to(device).eval()
41
-
 
 
 
42
  }
43
 
44
  processors = {
@@ -47,41 +33,31 @@ processors = {
47
 
48
  DESCRIPTION = "[Qwen2-VL-7B Demo](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)"
49
 
50
- kwargs = {}
51
- kwargs['torch_dtype'] = torch.bfloat16
52
-
53
  user_prompt = '<|user|>\n'
54
  assistant_prompt = '<|assistant|>\n'
55
  prompt_suffix = "<|end|>\n"
56
 
57
- @spaces.GPU
58
  def run_example(image, text_input=None, model_id="Qwen/Qwen2-VL-7B-Instruct"):
59
  image_path = array_to_image_path(image)
60
 
61
- print(image_path)
62
  model = models[model_id]
63
  processor = processors[model_id]
64
 
65
  prompt = f"{user_prompt}<|image_1|>\n{text_input}{prompt_suffix}{assistant_prompt}"
66
  image = Image.fromarray(image).convert("RGB")
67
  messages = [
68
- {
69
  "role": "user",
70
  "content": [
71
- {
72
- "type": "image",
73
- "image": image_path,
74
- },
75
  {"type": "text", "text": text_input},
76
  ],
77
  }
78
  ]
79
 
80
- # Preparation for inference
81
- text = processor.apply_chat_template(
82
- messages, tokenize=False, add_generation_prompt=True
83
- )
84
  image_inputs, video_inputs = process_vision_info(messages)
 
85
  inputs = processor(
86
  text=[text],
87
  images=image_inputs,
@@ -89,16 +65,13 @@ def run_example(image, text_input=None, model_id="Qwen/Qwen2-VL-7B-Instruct"):
89
  padding=True,
90
  return_tensors="pt",
91
  )
92
- inputs = inputs.to("cuda")
93
 
94
- # Inference: Generation of the output
95
- generated_ids = model.generate(**inputs, max_new_tokens=1024)
96
  generated_ids_trimmed = [
97
- out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
98
  ]
99
- output_text = processor.batch_decode(
100
- generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
101
- )
102
 
103
  return output_text[0]
104
 
 
1
  import gradio as gr
 
2
  from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
3
  from qwen_vl_utils import process_vision_info
4
  import torch
5
  from PIL import Image
 
6
  from datetime import datetime
7
  import numpy as np
8
  import os
9
 
 
 
 
 
 
 
 
10
  def array_to_image_path(image_array):
11
  if image_array is None:
12
  raise ValueError("No image provided. Please upload an image before submitting.")
 
13
  img = Image.fromarray(np.uint8(image_array))
 
 
14
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
15
  filename = f"image_{timestamp}.png"
 
 
16
  img.save(filename)
17
+ return os.path.abspath(filename)
 
 
 
 
18
 
19
+ # Force CPU
20
+ device = "cpu"
21
 
22
  models = {
23
+ "Qwen/Qwen2-VL-7B-Instruct": Qwen2VLForConditionalGeneration.from_pretrained(
24
+ "Qwen/Qwen2-VL-7B-Instruct",
25
+ trust_remote_code=True,
26
+ torch_dtype=torch.float32
27
+ ).to(device).eval()
28
  }
29
 
30
  processors = {
 
33
 
34
  DESCRIPTION = "[Qwen2-VL-7B Demo](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)"
35
 
 
 
 
36
  user_prompt = '<|user|>\n'
37
  assistant_prompt = '<|assistant|>\n'
38
  prompt_suffix = "<|end|>\n"
39
 
 
40
  def run_example(image, text_input=None, model_id="Qwen/Qwen2-VL-7B-Instruct"):
41
  image_path = array_to_image_path(image)
42
 
 
43
  model = models[model_id]
44
  processor = processors[model_id]
45
 
46
  prompt = f"{user_prompt}<|image_1|>\n{text_input}{prompt_suffix}{assistant_prompt}"
47
  image = Image.fromarray(image).convert("RGB")
48
  messages = [
49
+ {
50
  "role": "user",
51
  "content": [
52
+ {"type": "image", "image": image_path},
 
 
 
53
  {"type": "text", "text": text_input},
54
  ],
55
  }
56
  ]
57
 
58
+ text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
 
 
59
  image_inputs, video_inputs = process_vision_info(messages)
60
+
61
  inputs = processor(
62
  text=[text],
63
  images=image_inputs,
 
65
  padding=True,
66
  return_tensors="pt",
67
  )
68
+ inputs = inputs.to("cpu")
69
 
70
+ generated_ids = model.generate(**inputs, max_new_tokens=512) # reduced tokens for CPU
 
71
  generated_ids_trimmed = [
72
+ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
73
  ]
74
+ output_text = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)
 
 
75
 
76
  return output_text[0]
77