Spaces:
Running
on
CPU Spr
Running
on
CPU Spr
replace model
Browse files
app.py
CHANGED
|
@@ -13,7 +13,7 @@ from transformers.generation.streamers import TextIteratorStreamer
|
|
| 13 |
from optimum.intel import OVModelForVisualCausalLM
|
| 14 |
|
| 15 |
|
| 16 |
-
default_model_id = "echarlaix/SmolVLM2-
|
| 17 |
|
| 18 |
model_cache = {
|
| 19 |
"model_id" : default_model_id,
|
|
@@ -255,12 +255,9 @@ examples = [
|
|
| 255 |
|
| 256 |
|
| 257 |
model_choices = [
|
| 258 |
-
#"echarlaix/SmolVLM2-500M-Video-Instruct-openvino",
|
| 259 |
-
#"echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static",
|
| 260 |
-
#"echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq",
|
| 261 |
"echarlaix/SmolVLM2-256M-Video-Instruct-openvino",
|
| 262 |
"echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-woq-data-free",
|
| 263 |
-
"echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-
|
| 264 |
]
|
| 265 |
|
| 266 |
demo = gr.ChatInterface(
|
|
@@ -279,7 +276,7 @@ demo = gr.ChatInterface(
|
|
| 279 |
],
|
| 280 |
stop_btn=False,
|
| 281 |
title="Fast quantized SmolVLM2 ⚡",
|
| 282 |
-
description="Play with a [SmolVLM2-
|
| 283 |
examples=examples,
|
| 284 |
run_examples_on_click=False,
|
| 285 |
cache_examples=False,
|
|
|
|
| 13 |
from optimum.intel import OVModelForVisualCausalLM
|
| 14 |
|
| 15 |
|
| 16 |
+
default_model_id = "echarlaix/SmolVLM2-256M-Video-Instruct-openvino"
|
| 17 |
|
| 18 |
model_cache = {
|
| 19 |
"model_id" : default_model_id,
|
|
|
|
| 255 |
|
| 256 |
|
| 257 |
model_choices = [
|
|
|
|
|
|
|
|
|
|
| 258 |
"echarlaix/SmolVLM2-256M-Video-Instruct-openvino",
|
| 259 |
"echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-woq-data-free",
|
| 260 |
+
"echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-mixed",
|
| 261 |
]
|
| 262 |
|
| 263 |
demo = gr.ChatInterface(
|
|
|
|
| 276 |
],
|
| 277 |
stop_btn=False,
|
| 278 |
title="Fast quantized SmolVLM2 ⚡",
|
| 279 |
+
description="Play with a [SmolVLM2-256M-Video-Instruct-openvino](https://huggingface.co/echarlaix/SmolVLM2-256M-Video-Instruct-openvino) and its quantized variants : [SmolVLM2-256M-Video-Instruct-openvino-8bit-woq-data-free](https://huggingface.co/echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-woq-data-free) and [SmolVLM2-256M-Video-Instruct-openvino-8bit-mixed](https://huggingface.co/echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-mixed) both obtained by respectively applying Weight-Only Quantization and Static Quantization using [Optimum Intel](https://github.com/huggingface/optimum-intel) NNCF integration. To get started, upload an image and text or try one of the examples. This demo runs on 4th Generation Intel Xeon (Sapphire Rapids) processors.",
|
| 280 |
examples=examples,
|
| 281 |
run_examples_on_click=False,
|
| 282 |
cache_examples=False,
|