John Ho commited on
Commit
96a7d4d
·
1 Parent(s): 035a7ef

skipping the use of gemma model for now

Browse files
Files changed (1) hide show
  1. app.py +11 -10
app.py CHANGED
@@ -134,23 +134,23 @@ MODEL_ZOO = {
134
  "InternVL3-1B-hf": load_model(
135
  model_name="OpenGVLab/InternVL3-1B-hf",
136
  use_flash_attention=False,
137
- apply_quantization=True,
138
  ),
139
  "InternVL3-2B-hf": load_model(
140
  model_name="OpenGVLab/InternVL3-2B-hf",
141
  use_flash_attention=False,
142
- apply_quantization=True,
143
  ),
144
  "InternVL3-8B-hf": load_model(
145
  model_name="OpenGVLab/InternVL3-8B-hf",
146
  use_flash_attention=False,
147
  apply_quantization=True,
148
  ),
149
- "gemma-3n-e4b-it": load_model(
150
- model_name="google/gemma-3n-e4b-it",
151
- use_flash_attention=False,
152
- apply_quantization=True,
153
- ),
154
  }
155
 
156
  PROCESSORS = {
@@ -160,7 +160,7 @@ PROCESSORS = {
160
  "InternVL3-1B-hf": load_processor("OpenGVLab/InternVL3-1B-hf"),
161
  "InternVL3-2B-hf": load_processor("OpenGVLab/InternVL3-2B-hf"),
162
  "InternVL3-8B-hf": load_processor("OpenGVLab/InternVL3-8B-hf"),
163
- "gemma-3n-e4b-it": load_processor("google/gemma-3n-e4b-it"),
164
  }
165
  logger.debug("Models and Processors Loaded!")
166
 
@@ -276,7 +276,7 @@ demo = gr.Interface(
276
  gr.Textbox(
277
  label="Prompt",
278
  lines=3,
279
- info="[cam motion](https://huggingface.co/chancharikm/qwen2.5-vl-7b-cam-motion-preview)",
280
  value="Describe the camera motion in this video.",
281
  ),
282
  gr.Dropdown(label="Model", choices=list(MODEL_ZOO.keys())),
@@ -306,7 +306,8 @@ demo = gr.Interface(
306
  # gr.Checkbox(label="Apply Quantization", value=True),
307
  ],
308
  outputs=gr.JSON(label="Output JSON"),
309
- title="",
 
310
  api_name="video_inference",
311
  )
312
  demo.launch(
 
134
  "InternVL3-1B-hf": load_model(
135
  model_name="OpenGVLab/InternVL3-1B-hf",
136
  use_flash_attention=False,
137
+ apply_quantization=False,
138
  ),
139
  "InternVL3-2B-hf": load_model(
140
  model_name="OpenGVLab/InternVL3-2B-hf",
141
  use_flash_attention=False,
142
+ apply_quantization=False,
143
  ),
144
  "InternVL3-8B-hf": load_model(
145
  model_name="OpenGVLab/InternVL3-8B-hf",
146
  use_flash_attention=False,
147
  apply_quantization=True,
148
  ),
149
+ # "gemma-3n-e4b-it": load_model(
150
+ # model_name="google/gemma-3n-e4b-it",
151
+ # use_flash_attention=False,
152
+ # apply_quantization=True,
153
+ # ),
154
  }
155
 
156
  PROCESSORS = {
 
160
  "InternVL3-1B-hf": load_processor("OpenGVLab/InternVL3-1B-hf"),
161
  "InternVL3-2B-hf": load_processor("OpenGVLab/InternVL3-2B-hf"),
162
  "InternVL3-8B-hf": load_processor("OpenGVLab/InternVL3-8B-hf"),
163
+ # "gemma-3n-e4b-it": load_processor("google/gemma-3n-e4b-it"),
164
  }
165
  logger.debug("Models and Processors Loaded!")
166
 
 
276
  gr.Textbox(
277
  label="Prompt",
278
  lines=3,
279
+ info="Some models like [cam motion](https://huggingface.co/chancharikm/qwen2.5-vl-7b-cam-motion-preview) are trained specific prompts",
280
  value="Describe the camera motion in this video.",
281
  ),
282
  gr.Dropdown(label="Model", choices=list(MODEL_ZOO.keys())),
 
306
  # gr.Checkbox(label="Apply Quantization", value=True),
307
  ],
308
  outputs=gr.JSON(label="Output JSON"),
309
+ title="Video Captioning with VLM",
310
+ description='comparing various "small" VLMs on the task of video captioning',
311
  api_name="video_inference",
312
  )
313
  demo.launch(