Spaces:
Running
on
Zero
Running
on
Zero
update app
Browse files
app.py
CHANGED
|
@@ -129,6 +129,7 @@ MODEL_ID_X = "Senqiao/VisionThink-Efficient"
|
|
| 129 |
processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True, use_fast=False)
|
| 130 |
model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 131 |
MODEL_ID_X,
|
|
|
|
| 132 |
trust_remote_code=True,
|
| 133 |
torch_dtype=torch.float16
|
| 134 |
).to(device).eval()
|
|
@@ -137,6 +138,7 @@ MODEL_ID_T = "scb10x/typhoon-ocr-3b"
|
|
| 137 |
processor_t = AutoProcessor.from_pretrained(MODEL_ID_T, trust_remote_code=True, use_fast=False)
|
| 138 |
model_t = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 139 |
MODEL_ID_T,
|
|
|
|
| 140 |
trust_remote_code=True,
|
| 141 |
torch_dtype=torch.float16
|
| 142 |
).to(device).eval()
|
|
@@ -145,6 +147,7 @@ MODEL_ID_O = "allenai/olmOCR-7B-0225-preview"
|
|
| 145 |
processor_o = AutoProcessor.from_pretrained(MODEL_ID_O, trust_remote_code=True, use_fast=False)
|
| 146 |
model_o = Qwen2VLForConditionalGeneration.from_pretrained(
|
| 147 |
MODEL_ID_O,
|
|
|
|
| 148 |
trust_remote_code=True,
|
| 149 |
torch_dtype=torch.float16
|
| 150 |
).to(device).eval()
|
|
@@ -154,6 +157,7 @@ SUBFOLDER = "think-preview"
|
|
| 154 |
processor_j = AutoProcessor.from_pretrained(MODEL_ID_J, trust_remote_code=True, subfolder=SUBFOLDER, use_fast=False)
|
| 155 |
model_j = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 156 |
MODEL_ID_J,
|
|
|
|
| 157 |
trust_remote_code=True,
|
| 158 |
subfolder=SUBFOLDER,
|
| 159 |
torch_dtype=torch.float16
|
|
@@ -162,6 +166,7 @@ model_j = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
|
| 162 |
MODEL_ID_V4 = 'openbmb/MiniCPM-V-4'
|
| 163 |
model_v4 = AutoModel.from_pretrained(
|
| 164 |
MODEL_ID_V4,
|
|
|
|
| 165 |
trust_remote_code=True,
|
| 166 |
torch_dtype=torch.bfloat16,
|
| 167 |
).eval().to(device)
|
|
@@ -309,7 +314,7 @@ video_examples = [
|
|
| 309 |
["Explain the ad in detail.", "videos/1.mp4"]
|
| 310 |
]
|
| 311 |
|
| 312 |
-
with gr.Blocks(
|
| 313 |
gr.Markdown("# **Multimodal VLM Thinking**", elem_id="main-title")
|
| 314 |
with gr.Row():
|
| 315 |
with gr.Column(scale=2):
|
|
@@ -356,4 +361,4 @@ with gr.Blocks(theme=steel_blue_theme, css=css) as demo:
|
|
| 356 |
)
|
| 357 |
|
| 358 |
if __name__ == "__main__":
|
| 359 |
-
demo.queue(max_size=50).launch(mcp_server=True, ssr_mode=False, show_error=True)
|
|
|
|
| 129 |
processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True, use_fast=False)
|
| 130 |
model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 131 |
MODEL_ID_X,
|
| 132 |
+
attn_implementation="flash_attention_2",
|
| 133 |
trust_remote_code=True,
|
| 134 |
torch_dtype=torch.float16
|
| 135 |
).to(device).eval()
|
|
|
|
| 138 |
processor_t = AutoProcessor.from_pretrained(MODEL_ID_T, trust_remote_code=True, use_fast=False)
|
| 139 |
model_t = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 140 |
MODEL_ID_T,
|
| 141 |
+
attn_implementation="flash_attention_2",
|
| 142 |
trust_remote_code=True,
|
| 143 |
torch_dtype=torch.float16
|
| 144 |
).to(device).eval()
|
|
|
|
| 147 |
processor_o = AutoProcessor.from_pretrained(MODEL_ID_O, trust_remote_code=True, use_fast=False)
|
| 148 |
model_o = Qwen2VLForConditionalGeneration.from_pretrained(
|
| 149 |
MODEL_ID_O,
|
| 150 |
+
attn_implementation="flash_attention_2",
|
| 151 |
trust_remote_code=True,
|
| 152 |
torch_dtype=torch.float16
|
| 153 |
).to(device).eval()
|
|
|
|
| 157 |
processor_j = AutoProcessor.from_pretrained(MODEL_ID_J, trust_remote_code=True, subfolder=SUBFOLDER, use_fast=False)
|
| 158 |
model_j = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 159 |
MODEL_ID_J,
|
| 160 |
+
attn_implementation="flash_attention_2",
|
| 161 |
trust_remote_code=True,
|
| 162 |
subfolder=SUBFOLDER,
|
| 163 |
torch_dtype=torch.float16
|
|
|
|
| 166 |
MODEL_ID_V4 = 'openbmb/MiniCPM-V-4'
|
| 167 |
model_v4 = AutoModel.from_pretrained(
|
| 168 |
MODEL_ID_V4,
|
| 169 |
+
attn_implementation="flash_attention_2",
|
| 170 |
trust_remote_code=True,
|
| 171 |
torch_dtype=torch.bfloat16,
|
| 172 |
).eval().to(device)
|
|
|
|
| 314 |
["Explain the ad in detail.", "videos/1.mp4"]
|
| 315 |
]
|
| 316 |
|
| 317 |
+
with gr.Blocks() as demo:
|
| 318 |
gr.Markdown("# **Multimodal VLM Thinking**", elem_id="main-title")
|
| 319 |
with gr.Row():
|
| 320 |
with gr.Column(scale=2):
|
|
|
|
| 361 |
)
|
| 362 |
|
| 363 |
if __name__ == "__main__":
|
| 364 |
+
demo.queue(max_size=50).launch(theme=steel_blue_theme, css=css, mcp_server=True, ssr_mode=False, show_error=True)
|