Spaces:

prithivMLmods
/

Multimodal-VLM-Thinking

Running on Zero

App Files Files Community

prithivMLmods commited on 12 days ago

Commit

867fdfc

verified ·

1 Parent(s): 729a8d0

update app

Browse files

Files changed (1) hide show

app.py +7 -2

app.py CHANGED Viewed

@@ -129,6 +129,7 @@ MODEL_ID_X = "Senqiao/VisionThink-Efficient"
 processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True, use_fast=False)
 model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_X,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
@@ -137,6 +138,7 @@ MODEL_ID_T = "scb10x/typhoon-ocr-3b"
 processor_t = AutoProcessor.from_pretrained(MODEL_ID_T, trust_remote_code=True, use_fast=False)
 model_t = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_T,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
@@ -145,6 +147,7 @@ MODEL_ID_O = "allenai/olmOCR-7B-0225-preview"
 processor_o = AutoProcessor.from_pretrained(MODEL_ID_O, trust_remote_code=True, use_fast=False)
 model_o = Qwen2VLForConditionalGeneration.from_pretrained(
     MODEL_ID_O,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
@@ -154,6 +157,7 @@ SUBFOLDER = "think-preview"
 processor_j = AutoProcessor.from_pretrained(MODEL_ID_J, trust_remote_code=True, subfolder=SUBFOLDER, use_fast=False)
 model_j = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_J,
     trust_remote_code=True,
     subfolder=SUBFOLDER,
     torch_dtype=torch.float16
@@ -162,6 +166,7 @@ model_j = Qwen2_5_VLForConditionalGeneration.from_pretrained(
 MODEL_ID_V4 = 'openbmb/MiniCPM-V-4'
 model_v4 = AutoModel.from_pretrained(
     MODEL_ID_V4,
     trust_remote_code=True,
     torch_dtype=torch.bfloat16,
 ).eval().to(device)
@@ -309,7 +314,7 @@ video_examples = [
     ["Explain the ad in detail.", "videos/1.mp4"]
 ]
-with gr.Blocks(theme=steel_blue_theme, css=css) as demo:
     gr.Markdown("# **Multimodal VLM Thinking**", elem_id="main-title")
     with gr.Row():
         with gr.Column(scale=2):
@@ -356,4 +361,4 @@ with gr.Blocks(theme=steel_blue_theme, css=css) as demo:
     )
 if __name__ == "__main__":
-    demo.queue(max_size=50).launch(mcp_server=True, ssr_mode=False, show_error=True)

 processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True, use_fast=False)
 model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_X,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
 processor_t = AutoProcessor.from_pretrained(MODEL_ID_T, trust_remote_code=True, use_fast=False)
 model_t = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_T,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
 processor_o = AutoProcessor.from_pretrained(MODEL_ID_O, trust_remote_code=True, use_fast=False)
 model_o = Qwen2VLForConditionalGeneration.from_pretrained(
     MODEL_ID_O,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
 processor_j = AutoProcessor.from_pretrained(MODEL_ID_J, trust_remote_code=True, subfolder=SUBFOLDER, use_fast=False)
 model_j = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_J,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     subfolder=SUBFOLDER,
     torch_dtype=torch.float16
 MODEL_ID_V4 = 'openbmb/MiniCPM-V-4'
 model_v4 = AutoModel.from_pretrained(
     MODEL_ID_V4,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.bfloat16,
 ).eval().to(device)
     ["Explain the ad in detail.", "videos/1.mp4"]
 ]
+with gr.Blocks() as demo:
     gr.Markdown("# **Multimodal VLM Thinking**", elem_id="main-title")
     with gr.Row():
         with gr.Column(scale=2):
     )
 if __name__ == "__main__":
+    demo.queue(max_size=50).launch(theme=steel_blue_theme, css=css, mcp_server=True, ssr_mode=False, show_error=True)