John Ho commited on
Commit
25dfae5
·
1 Parent(s): b89bc96

skip loading of internvl3 8b model

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import spaces, ffmpeg, os, sys, torch
2
  import gradio as gr
3
  from transformers import (
4
  Qwen2_5_VLForConditionalGeneration,
@@ -120,9 +120,9 @@ MODEL_ZOO = {
120
  "InternVL3-2B-hf": AutoModelForImageTextToText.from_pretrained(
121
  "OpenGVLab/InternVL3-2B-hf", device_map=DEVICE, torch_dtype=DTYPE
122
  ),
123
- "InternVL3-8B-hf": AutoModelForImageTextToText.from_pretrained(
124
- "OpenGVLab/InternVL3-8B-hf", device_map=DEVICE, torch_dtype=DTYPE
125
- ),
126
  }
127
 
128
  PROCESSORS = {
@@ -131,7 +131,7 @@ PROCESSORS = {
131
  "qwen2.5-vl-3b-instruct": load_processor("Qwen/Qwen2.5-VL-3B-Instruct"),
132
  "InternVL3-1B-hf": load_processor("OpenGVLab/InternVL3-1B-hf"),
133
  "InternVL3-2B-hf": load_processor("OpenGVLab/InternVL3-2B-hf"),
134
- "InternVL3-8B-hf": load_processor("OpenGVLab/InternVL3-8B-hf"),
135
  }
136
  logger.debug("Models and Processors Loaded!")
137
 
@@ -144,6 +144,7 @@ def inference(
144
  custom_fps: int = 8,
145
  max_tokens: int = 256,
146
  ):
 
147
  # default processor
148
  # processor, model = PROCESSOR, MODEL
149
  # processor = load_processor()
@@ -225,10 +226,13 @@ def inference(
225
  output_text = processor.decode(
226
  output[0, inputs["input_ids"].shape[1] :], skip_special_tokens=True
227
  )
228
- logger.debug(output_text)
229
  case _:
230
  raise ValueError(f"{model_name} is not currently supported")
231
- return output_text
 
 
 
 
232
 
233
 
234
  demo = gr.Interface(
 
1
+ import spaces, ffmpeg, os, sys, torch, time
2
  import gradio as gr
3
  from transformers import (
4
  Qwen2_5_VLForConditionalGeneration,
 
120
  "InternVL3-2B-hf": AutoModelForImageTextToText.from_pretrained(
121
  "OpenGVLab/InternVL3-2B-hf", device_map=DEVICE, torch_dtype=DTYPE
122
  ),
123
+ # "InternVL3-8B-hf": AutoModelForImageTextToText.from_pretrained(
124
+ # "OpenGVLab/InternVL3-8B-hf", device_map=DEVICE, torch_dtype=DTYPE
125
+ # ),
126
  }
127
 
128
  PROCESSORS = {
 
131
  "qwen2.5-vl-3b-instruct": load_processor("Qwen/Qwen2.5-VL-3B-Instruct"),
132
  "InternVL3-1B-hf": load_processor("OpenGVLab/InternVL3-1B-hf"),
133
  "InternVL3-2B-hf": load_processor("OpenGVLab/InternVL3-2B-hf"),
134
+ # "InternVL3-8B-hf": load_processor("OpenGVLab/InternVL3-8B-hf"),
135
  }
136
  logger.debug("Models and Processors Loaded!")
137
 
 
144
  custom_fps: int = 8,
145
  max_tokens: int = 256,
146
  ):
147
+ s_time = time.time()
148
  # default processor
149
  # processor, model = PROCESSOR, MODEL
150
  # processor = load_processor()
 
226
  output_text = processor.decode(
227
  output[0, inputs["input_ids"].shape[1] :], skip_special_tokens=True
228
  )
 
229
  case _:
230
  raise ValueError(f"{model_name} is not currently supported")
231
+ return {
232
+ "output_text": output_text,
233
+ "fps": fps,
234
+ "inference_time": time.time() - s_time,
235
+ }
236
 
237
 
238
  demo = gr.Interface(