Spaces:

whyumesh
/

eterniq_vision_to_code

Sleeping

whyumesh commited on Oct 1, 2024

Commit

124292b

verified ·

1 Parent(s): 5c690f1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,8 +10,8 @@ import os
 def load_model():
     model = Qwen2VLForConditionalGeneration.from_pretrained(
         "Qwen/Qwen2-VL-2B-Instruct",
-        torch_dtype=torch.float32
-    )
     processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
     return model, processor
@@ -103,9 +103,10 @@ def generate_response(messages):
         videos=video_inputs,
         padding=True,
         return_tensors="pt",
-    )
     del image_inputs, video_inputs
     with torch.no_grad():
         generated_ids = model.generate(**inputs, max_new_tokens=512)  # Increased token limit for more detailed responses
@@ -130,4 +131,4 @@ iface = gr.Interface(
     description="Upload an image or video of code, diagrams, or technical content. Ask questions about bugs, errors, or explanations of functions.",
 )
-iface.launch()

 def load_model():
     model = Qwen2VLForConditionalGeneration.from_pretrained(
         "Qwen/Qwen2-VL-2B-Instruct",
+        torch_dtype=torch.float16
+    ).to("cuda")
     processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
     return model, processor
         videos=video_inputs,
         padding=True,
         return_tensors="pt",
+    ).to("cuda")
     del image_inputs, video_inputs
+    torch.cuda.empty_cache()
     with torch.no_grad():
         generated_ids = model.generate(**inputs, max_new_tokens=512)  # Increased token limit for more detailed responses
     description="Upload an image or video of code, diagrams, or technical content. Ask questions about bugs, errors, or explanations of functions.",
 )
+iface.launch(share=True)