ankstoo commited on
Commit
32eafc6
·
1 Parent(s): 882b593
Files changed (2) hide show
  1. app.py +35 -20
  2. requirements.txt +3 -1
app.py CHANGED
@@ -17,6 +17,8 @@ import cv2
17
 
18
  from transformers import (
19
  Qwen2_5_VLForConditionalGeneration,
 
 
20
  AutoModel,
21
  AutoTokenizer,
22
  AutoProcessor,
@@ -29,6 +31,9 @@ from llm_json import parse_llm_json
29
  from data_experiments import all_products, all_experiments, filter_experiments, get_experiment
30
  import llm_messages_v1
31
 
 
 
 
32
  llm_messages = llm_messages_v1
33
 
34
  # Constants for text generation
@@ -38,24 +43,32 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
38
 
39
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
40
 
41
- # Load Qwen2.5-VL-7B-Instruct
42
- MODEL_ID_M = "Qwen/Qwen2.5-VL-7B-Instruct"
43
- processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
44
- model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
45
- MODEL_ID_M,
 
 
 
 
 
 
 
46
  trust_remote_code=True,
47
  torch_dtype=torch.float16
48
  ).to(device).eval()
49
 
50
- # Load Qwen2.5-VL-3B-Instruct
51
- MODEL_ID_X = "Qwen/Qwen2.5-VL-3B-Instruct"
52
- processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
53
- model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
54
- MODEL_ID_X,
55
  trust_remote_code=True,
56
  torch_dtype=torch.float16
57
  ).to(device).eval()
58
 
 
 
59
  @spaces.GPU
60
  def process_image(
61
  model_name: str,
@@ -167,17 +180,19 @@ def process_image(
167
  full_json_text = json.dumps(full_json, indent=2, ensure_ascii=False)
168
  yield full_text, full_json_text
169
 
170
- def get_processor_and_model(model_name: str) -> tuple[Union[AutoProcessor, None], Union[Qwen2_5_VLForConditionalGeneration, None]]:
171
- if model_name == "Qwen2.5-VL-7B-Instruct":
172
- return processor_m, model_m
173
- elif model_name == "Qwen2.5-VL-3B-Instruct":
174
- return processor_x, model_x
 
 
175
  else:
176
  raise (None, None)
177
 
178
  def process_image_safety_state(
179
  processor: AutoProcessor,
180
- model: Qwen2_5_VLForConditionalGeneration,
181
  image: Image.Image,
182
  max_new_tokens: int,
183
  generateion_config: GenerationConfig,
@@ -202,7 +217,7 @@ def process_image_safety_state(
202
 
203
  def process_image_sharing_state(
204
  processor: AutoProcessor,
205
- model: Qwen2_5_VLForConditionalGeneration,
206
  image: Image.Image,
207
  max_new_tokens: int,
208
  generateion_config: GenerationConfig,
@@ -227,7 +242,7 @@ def process_image_sharing_state(
227
 
228
  def process_image_approval_state(
229
  processor: AutoProcessor,
230
- model: Qwen2_5_VLForConditionalGeneration,
231
  experiment: dict,
232
  image: Image.Image,
233
  additional_text: Union[str, None],
@@ -386,9 +401,9 @@ with gr.Blocks() as demo:
386
  image_upload = gr.Image(type="pil", label="Image")
387
  submit_button = gr.Button("Submit", elem_classes="submit-btn")
388
  model_choice = gr.Radio(
389
- choices=["Qwen2.5-VL-7B-Instruct", "Qwen2.5-VL-3B-Instruct"],
390
  label="Select Model",
391
- value="Qwen2.5-VL-7B-Instruct"
392
  )
393
  with gr.Accordion("Advanced options", open=False):
394
  max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
 
17
 
18
  from transformers import (
19
  Qwen2_5_VLForConditionalGeneration,
20
+ MllamaForConditionalGeneration,
21
+ GenerationMixin,
22
  AutoModel,
23
  AutoTokenizer,
24
  AutoProcessor,
 
31
  from data_experiments import all_products, all_experiments, filter_experiments, get_experiment
32
  import llm_messages_v1
33
 
34
+ from dotenv import load_dotenv
35
+ load_dotenv()
36
+
37
  llm_messages = llm_messages_v1
38
 
39
  # Constants for text generation
 
43
 
44
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
45
 
46
+ MODEL_ID_QWEN_7B = "Qwen/Qwen2.5-VL-7B-Instruct"
47
+ processor_qwen_7b = AutoProcessor.from_pretrained(MODEL_ID_QWEN_7B, trust_remote_code=True)
48
+ model_qwen_7b = Qwen2_5_VLForConditionalGeneration.from_pretrained(
49
+ MODEL_ID_QWEN_7B,
50
+ trust_remote_code=True,
51
+ torch_dtype=torch.float16
52
+ ).to(device).eval()
53
+
54
+ MODEL_ID_QWEN_3B = "Qwen/Qwen2.5-VL-3B-Instruct"
55
+ processor_qwen_3b = AutoProcessor.from_pretrained(MODEL_ID_QWEN_3B, trust_remote_code=True)
56
+ model_qwen_3b = Qwen2_5_VLForConditionalGeneration.from_pretrained(
57
+ MODEL_ID_QWEN_3B,
58
  trust_remote_code=True,
59
  torch_dtype=torch.float16
60
  ).to(device).eval()
61
 
62
+ MODEL_ID_LLAMA = "meta-llama/Llama-3.2-11B-Vision-Instruct"
63
+ processor_llama = AutoProcessor.from_pretrained(MODEL_ID_LLAMA, trust_remote_code=True)
64
+ model_llama = MllamaForConditionalGeneration.from_pretrained(
65
+ MODEL_ID_LLAMA,
 
66
  trust_remote_code=True,
67
  torch_dtype=torch.float16
68
  ).to(device).eval()
69
 
70
+ openai_api_key = os.getenv("OPENAI_API_KEY")
71
+
72
  @spaces.GPU
73
  def process_image(
74
  model_name: str,
 
180
  full_json_text = json.dumps(full_json, indent=2, ensure_ascii=False)
181
  yield full_text, full_json_text
182
 
183
+ def get_processor_and_model(model_name: str) -> tuple[Union[AutoProcessor, None], Union[GenerationMixin, None]]:
184
+ if model_name == MODEL_ID_QWEN_7B:
185
+ return processor_qwen_7b, model_qwen_7b
186
+ elif model_name == MODEL_ID_QWEN_3B:
187
+ return processor_qwen_3b, model_qwen_3b
188
+ elif model_name == MODEL_ID_LLAMA:
189
+ return processor_llama, model_llama
190
  else:
191
  raise (None, None)
192
 
193
  def process_image_safety_state(
194
  processor: AutoProcessor,
195
+ model: GenerationMixin,
196
  image: Image.Image,
197
  max_new_tokens: int,
198
  generateion_config: GenerationConfig,
 
217
 
218
  def process_image_sharing_state(
219
  processor: AutoProcessor,
220
+ model: GenerationMixin,
221
  image: Image.Image,
222
  max_new_tokens: int,
223
  generateion_config: GenerationConfig,
 
242
 
243
  def process_image_approval_state(
244
  processor: AutoProcessor,
245
+ model: GenerationMixin,
246
  experiment: dict,
247
  image: Image.Image,
248
  additional_text: Union[str, None],
 
401
  image_upload = gr.Image(type="pil", label="Image")
402
  submit_button = gr.Button("Submit", elem_classes="submit-btn")
403
  model_choice = gr.Radio(
404
+ choices=[MODEL_ID_QWEN_7B, MODEL_ID_QWEN_3B, MODEL_ID_LLAMA],
405
  label="Select Model",
406
+ value=MODEL_ID_QWEN_3B
407
  )
408
  with gr.Accordion("Advanced options", open=False):
409
  max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
requirements.txt CHANGED
@@ -10,4 +10,6 @@ accelerate
10
  pillow
11
  opencv-python
12
  av
13
- demjson3
 
 
 
10
  pillow
11
  opencv-python
12
  av
13
+ demjson3
14
+ dotenv
15
+ openai