echarlaix HF Staff commited on
Commit
6a80cb1
·
1 Parent(s): 27b72ae

replace model

Browse files
Files changed (1) hide show
  1. app.py +3 -6
app.py CHANGED
@@ -13,7 +13,7 @@ from transformers.generation.streamers import TextIteratorStreamer
13
  from optimum.intel import OVModelForVisualCausalLM
14
 
15
 
16
- default_model_id = "echarlaix/SmolVLM2-500M-Video-Instruct-openvino"
17
 
18
  model_cache = {
19
  "model_id" : default_model_id,
@@ -255,12 +255,9 @@ examples = [
255
 
256
 
257
  model_choices = [
258
- #"echarlaix/SmolVLM2-500M-Video-Instruct-openvino",
259
- #"echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static",
260
- #"echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq",
261
  "echarlaix/SmolVLM2-256M-Video-Instruct-openvino",
262
  "echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-woq-data-free",
263
- "echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-static-2",
264
  ]
265
 
266
  demo = gr.ChatInterface(
@@ -279,7 +276,7 @@ demo = gr.ChatInterface(
279
  ],
280
  stop_btn=False,
281
  title="Fast quantized SmolVLM2 ⚡",
282
- description="Play with a [SmolVLM2-500M-Video-Instruct](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino) and its quantized variants : [SmolVLM2-500M-Video-Instruct-openvino-8bit-woq](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq) and [SmolVLM2-500M-Video-Instruct-openvino-8bit-static](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static) both obtained by respectively applying Weight-Only Quantization and Static Quantization using [Optimum Intel](https://github.com/huggingface/optimum-intel) NNCF integration. To get started, upload an image and text or try one of the examples. This demo runs on 4th Generation Intel Xeon (Sapphire Rapids) processors.",
283
  examples=examples,
284
  run_examples_on_click=False,
285
  cache_examples=False,
 
13
  from optimum.intel import OVModelForVisualCausalLM
14
 
15
 
16
+ default_model_id = "echarlaix/SmolVLM2-256M-Video-Instruct-openvino"
17
 
18
  model_cache = {
19
  "model_id" : default_model_id,
 
255
 
256
 
257
  model_choices = [
 
 
 
258
  "echarlaix/SmolVLM2-256M-Video-Instruct-openvino",
259
  "echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-woq-data-free",
260
+ "echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-mixed",
261
  ]
262
 
263
  demo = gr.ChatInterface(
 
276
  ],
277
  stop_btn=False,
278
  title="Fast quantized SmolVLM2 ⚡",
279
+ description="Play with a [SmolVLM2-256M-Video-Instruct-openvino](https://huggingface.co/echarlaix/SmolVLM2-256M-Video-Instruct-openvino) and its quantized variants : [SmolVLM2-256M-Video-Instruct-openvino-8bit-woq-data-free](https://huggingface.co/echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-woq-data-free) and [SmolVLM2-256M-Video-Instruct-openvino-8bit-mixed](https://huggingface.co/echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-mixed) both obtained by respectively applying Weight-Only Quantization and Static Quantization using [Optimum Intel](https://github.com/huggingface/optimum-intel) NNCF integration. To get started, upload an image and text or try one of the examples. This demo runs on 4th Generation Intel Xeon (Sapphire Rapids) processors.",
280
  examples=examples,
281
  run_examples_on_click=False,
282
  cache_examples=False,