Spaces:
Running
on
Zero
Running
on
Zero
John Ho
commited on
Commit
·
96a7d4d
1
Parent(s):
035a7ef
skipping the use of gemma model for now
Browse files
app.py
CHANGED
|
@@ -134,23 +134,23 @@ MODEL_ZOO = {
|
|
| 134 |
"InternVL3-1B-hf": load_model(
|
| 135 |
model_name="OpenGVLab/InternVL3-1B-hf",
|
| 136 |
use_flash_attention=False,
|
| 137 |
-
apply_quantization=
|
| 138 |
),
|
| 139 |
"InternVL3-2B-hf": load_model(
|
| 140 |
model_name="OpenGVLab/InternVL3-2B-hf",
|
| 141 |
use_flash_attention=False,
|
| 142 |
-
apply_quantization=
|
| 143 |
),
|
| 144 |
"InternVL3-8B-hf": load_model(
|
| 145 |
model_name="OpenGVLab/InternVL3-8B-hf",
|
| 146 |
use_flash_attention=False,
|
| 147 |
apply_quantization=True,
|
| 148 |
),
|
| 149 |
-
"gemma-3n-e4b-it": load_model(
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
),
|
| 154 |
}
|
| 155 |
|
| 156 |
PROCESSORS = {
|
|
@@ -160,7 +160,7 @@ PROCESSORS = {
|
|
| 160 |
"InternVL3-1B-hf": load_processor("OpenGVLab/InternVL3-1B-hf"),
|
| 161 |
"InternVL3-2B-hf": load_processor("OpenGVLab/InternVL3-2B-hf"),
|
| 162 |
"InternVL3-8B-hf": load_processor("OpenGVLab/InternVL3-8B-hf"),
|
| 163 |
-
"gemma-3n-e4b-it": load_processor("google/gemma-3n-e4b-it"),
|
| 164 |
}
|
| 165 |
logger.debug("Models and Processors Loaded!")
|
| 166 |
|
|
@@ -276,7 +276,7 @@ demo = gr.Interface(
|
|
| 276 |
gr.Textbox(
|
| 277 |
label="Prompt",
|
| 278 |
lines=3,
|
| 279 |
-
info="[cam motion](https://huggingface.co/chancharikm/qwen2.5-vl-7b-cam-motion-preview)",
|
| 280 |
value="Describe the camera motion in this video.",
|
| 281 |
),
|
| 282 |
gr.Dropdown(label="Model", choices=list(MODEL_ZOO.keys())),
|
|
@@ -306,7 +306,8 @@ demo = gr.Interface(
|
|
| 306 |
# gr.Checkbox(label="Apply Quantization", value=True),
|
| 307 |
],
|
| 308 |
outputs=gr.JSON(label="Output JSON"),
|
| 309 |
-
title="",
|
|
|
|
| 310 |
api_name="video_inference",
|
| 311 |
)
|
| 312 |
demo.launch(
|
|
|
|
| 134 |
"InternVL3-1B-hf": load_model(
|
| 135 |
model_name="OpenGVLab/InternVL3-1B-hf",
|
| 136 |
use_flash_attention=False,
|
| 137 |
+
apply_quantization=False,
|
| 138 |
),
|
| 139 |
"InternVL3-2B-hf": load_model(
|
| 140 |
model_name="OpenGVLab/InternVL3-2B-hf",
|
| 141 |
use_flash_attention=False,
|
| 142 |
+
apply_quantization=False,
|
| 143 |
),
|
| 144 |
"InternVL3-8B-hf": load_model(
|
| 145 |
model_name="OpenGVLab/InternVL3-8B-hf",
|
| 146 |
use_flash_attention=False,
|
| 147 |
apply_quantization=True,
|
| 148 |
),
|
| 149 |
+
# "gemma-3n-e4b-it": load_model(
|
| 150 |
+
# model_name="google/gemma-3n-e4b-it",
|
| 151 |
+
# use_flash_attention=False,
|
| 152 |
+
# apply_quantization=True,
|
| 153 |
+
# ),
|
| 154 |
}
|
| 155 |
|
| 156 |
PROCESSORS = {
|
|
|
|
| 160 |
"InternVL3-1B-hf": load_processor("OpenGVLab/InternVL3-1B-hf"),
|
| 161 |
"InternVL3-2B-hf": load_processor("OpenGVLab/InternVL3-2B-hf"),
|
| 162 |
"InternVL3-8B-hf": load_processor("OpenGVLab/InternVL3-8B-hf"),
|
| 163 |
+
# "gemma-3n-e4b-it": load_processor("google/gemma-3n-e4b-it"),
|
| 164 |
}
|
| 165 |
logger.debug("Models and Processors Loaded!")
|
| 166 |
|
|
|
|
| 276 |
gr.Textbox(
|
| 277 |
label="Prompt",
|
| 278 |
lines=3,
|
| 279 |
+
info="Some models like [cam motion](https://huggingface.co/chancharikm/qwen2.5-vl-7b-cam-motion-preview) are trained specific prompts",
|
| 280 |
value="Describe the camera motion in this video.",
|
| 281 |
),
|
| 282 |
gr.Dropdown(label="Model", choices=list(MODEL_ZOO.keys())),
|
|
|
|
| 306 |
# gr.Checkbox(label="Apply Quantization", value=True),
|
| 307 |
],
|
| 308 |
outputs=gr.JSON(label="Output JSON"),
|
| 309 |
+
title="Video Captioning with VLM",
|
| 310 |
+
description='comparing various "small" VLMs on the task of video captioning',
|
| 311 |
api_name="video_inference",
|
| 312 |
)
|
| 313 |
demo.launch(
|