Qwen2.5-VL

Runtime error

App Files Files Community

ankstoo commited on Jul 18, 2025

Commit

32eafc6

1 Parent(s): 882b593

123

Browse files

Files changed (2) hide show

app.py +35 -20
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -17,6 +17,8 @@ import cv2
 from transformers import (
     Qwen2_5_VLForConditionalGeneration,
     AutoModel,
     AutoTokenizer,
     AutoProcessor,
@@ -29,6 +31,9 @@ from llm_json import parse_llm_json
 from data_experiments import all_products, all_experiments, filter_experiments, get_experiment
 import llm_messages_v1
 llm_messages = llm_messages_v1
 # Constants for text generation
@@ -38,24 +43,32 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-# Load Qwen2.5-VL-7B-Instruct
-MODEL_ID_M = "Qwen/Qwen2.5-VL-7B-Instruct"
-processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
-model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    MODEL_ID_M,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
-# Load Qwen2.5-VL-3B-Instruct
-MODEL_ID_X = "Qwen/Qwen2.5-VL-3B-Instruct"
-processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
-model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    MODEL_ID_X,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
 @spaces.GPU
 def process_image(
     model_name: str,
@@ -167,17 +180,19 @@ def process_image(
     full_json_text = json.dumps(full_json, indent=2, ensure_ascii=False)
     yield full_text, full_json_text
-def get_processor_and_model(model_name: str) -> tuple[Union[AutoProcessor, None], Union[Qwen2_5_VLForConditionalGeneration, None]]:
-    if model_name == "Qwen2.5-VL-7B-Instruct":
-        return processor_m, model_m
-    elif model_name == "Qwen2.5-VL-3B-Instruct":
-        return processor_x, model_x
     else:
         raise (None, None)
 def process_image_safety_state(
     processor: AutoProcessor,
-    model: Qwen2_5_VLForConditionalGeneration,
     image: Image.Image,
     max_new_tokens: int,
     generateion_config: GenerationConfig,
@@ -202,7 +217,7 @@ def process_image_safety_state(
 def process_image_sharing_state(
     processor: AutoProcessor,
-    model: Qwen2_5_VLForConditionalGeneration,
     image: Image.Image,
     max_new_tokens: int,
     generateion_config: GenerationConfig,
@@ -227,7 +242,7 @@ def process_image_sharing_state(
 def process_image_approval_state(
     processor: AutoProcessor,
-    model: Qwen2_5_VLForConditionalGeneration,
     experiment: dict,
     image: Image.Image,
     additional_text: Union[str, None],
@@ -386,9 +401,9 @@ with gr.Blocks() as demo:
             image_upload = gr.Image(type="pil", label="Image")
             submit_button = gr.Button("Submit", elem_classes="submit-btn")
             model_choice = gr.Radio(
-                choices=["Qwen2.5-VL-7B-Instruct", "Qwen2.5-VL-3B-Instruct"],
                 label="Select Model",
-                value="Qwen2.5-VL-7B-Instruct"
             )
             with gr.Accordion("Advanced options", open=False):
                 max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)

 from transformers import (
     Qwen2_5_VLForConditionalGeneration,
+    MllamaForConditionalGeneration,
+    GenerationMixin,
     AutoModel,
     AutoTokenizer,
     AutoProcessor,
 from data_experiments import all_products, all_experiments, filter_experiments, get_experiment
 import llm_messages_v1
+from dotenv import load_dotenv
+load_dotenv()
 llm_messages = llm_messages_v1
 # Constants for text generation
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+MODEL_ID_QWEN_7B = "Qwen/Qwen2.5-VL-7B-Instruct"
+processor_qwen_7b = AutoProcessor.from_pretrained(MODEL_ID_QWEN_7B, trust_remote_code=True)
+model_qwen_7b = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+    MODEL_ID_QWEN_7B,
+    trust_remote_code=True,
+    torch_dtype=torch.float16
+).to(device).eval()
+MODEL_ID_QWEN_3B = "Qwen/Qwen2.5-VL-3B-Instruct"
+processor_qwen_3b = AutoProcessor.from_pretrained(MODEL_ID_QWEN_3B, trust_remote_code=True)
+model_qwen_3b = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+    MODEL_ID_QWEN_3B,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
+MODEL_ID_LLAMA = "meta-llama/Llama-3.2-11B-Vision-Instruct"
+processor_llama = AutoProcessor.from_pretrained(MODEL_ID_LLAMA, trust_remote_code=True)
+model_llama = MllamaForConditionalGeneration.from_pretrained(
+    MODEL_ID_LLAMA,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
+openai_api_key = os.getenv("OPENAI_API_KEY")
 @spaces.GPU
 def process_image(
     model_name: str,
     full_json_text = json.dumps(full_json, indent=2, ensure_ascii=False)
     yield full_text, full_json_text
+def get_processor_and_model(model_name: str) -> tuple[Union[AutoProcessor, None], Union[GenerationMixin, None]]:
+    if model_name == MODEL_ID_QWEN_7B:
+        return processor_qwen_7b, model_qwen_7b
+    elif model_name == MODEL_ID_QWEN_3B:
+        return processor_qwen_3b, model_qwen_3b
+    elif model_name == MODEL_ID_LLAMA:
+        return processor_llama, model_llama
     else:
         raise (None, None)
 def process_image_safety_state(
     processor: AutoProcessor,
+    model: GenerationMixin,
     image: Image.Image,
     max_new_tokens: int,
     generateion_config: GenerationConfig,
 def process_image_sharing_state(
     processor: AutoProcessor,
+    model: GenerationMixin,
     image: Image.Image,
     max_new_tokens: int,
     generateion_config: GenerationConfig,
 def process_image_approval_state(
     processor: AutoProcessor,
+    model: GenerationMixin,
     experiment: dict,
     image: Image.Image,
     additional_text: Union[str, None],
             image_upload = gr.Image(type="pil", label="Image")
             submit_button = gr.Button("Submit", elem_classes="submit-btn")
             model_choice = gr.Radio(
+                choices=[MODEL_ID_QWEN_7B, MODEL_ID_QWEN_3B, MODEL_ID_LLAMA],
                 label="Select Model",
+                value=MODEL_ID_QWEN_3B
             )
             with gr.Accordion("Advanced options", open=False):
                 max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)

requirements.txt CHANGED Viewed

@@ -10,4 +10,6 @@ accelerate
 pillow
 opencv-python
 av
-demjson3

 pillow
 opencv-python
 av
+demjson3
+dotenv
+openai