Spaces:

echarlaix
/

vision-langage-openvino

Running on CPU Spr

App Files Files Community

echarlaix HF Staff commited on Oct 8

Commit

6a80cb1

1 Parent(s): 27b72ae

replace model

Browse files

Files changed (1) hide show

app.py +3 -6

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ from transformers.generation.streamers import TextIteratorStreamer
 from optimum.intel import OVModelForVisualCausalLM
-default_model_id = "echarlaix/SmolVLM2-500M-Video-Instruct-openvino"
 model_cache = {
     "model_id" : default_model_id,
@@ -255,12 +255,9 @@ examples = [
 model_choices = [
-    #"echarlaix/SmolVLM2-500M-Video-Instruct-openvino",
-    #"echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static",
-    #"echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq",
     "echarlaix/SmolVLM2-256M-Video-Instruct-openvino",
     "echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-woq-data-free",
-    "echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-static-2",
 ]
 demo = gr.ChatInterface(
@@ -279,7 +276,7 @@ demo = gr.ChatInterface(
     ],
     stop_btn=False,
     title="Fast quantized SmolVLM2 ⚡",
-    description="Play with a [SmolVLM2-500M-Video-Instruct](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino) and its quantized variants : [SmolVLM2-500M-Video-Instruct-openvino-8bit-woq](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq) and [SmolVLM2-500M-Video-Instruct-openvino-8bit-static](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static) both obtained by respectively applying Weight-Only Quantization and Static Quantization  using [Optimum Intel](https://github.com/huggingface/optimum-intel) NNCF integration. To get started, upload an image and text or try one of the examples. This demo runs on 4th Generation Intel Xeon (Sapphire Rapids) processors.",
     examples=examples,
     run_examples_on_click=False,
     cache_examples=False,

 from optimum.intel import OVModelForVisualCausalLM
+default_model_id = "echarlaix/SmolVLM2-256M-Video-Instruct-openvino"
 model_cache = {
     "model_id" : default_model_id,
 model_choices = [
     "echarlaix/SmolVLM2-256M-Video-Instruct-openvino",
     "echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-woq-data-free",
+    "echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-mixed",
 ]
 demo = gr.ChatInterface(
     ],
     stop_btn=False,
     title="Fast quantized SmolVLM2 ⚡",
+    description="Play with a [SmolVLM2-256M-Video-Instruct-openvino](https://huggingface.co/echarlaix/SmolVLM2-256M-Video-Instruct-openvino) and its quantized variants : [SmolVLM2-256M-Video-Instruct-openvino-8bit-woq-data-free](https://huggingface.co/echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-woq-data-free) and [SmolVLM2-256M-Video-Instruct-openvino-8bit-mixed](https://huggingface.co/echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-mixed) both obtained by respectively applying Weight-Only Quantization and Static Quantization  using [Optimum Intel](https://github.com/huggingface/optimum-intel) NNCF integration. To get started, upload an image and text or try one of the examples. This demo runs on 4th Generation Intel Xeon (Sapphire Rapids) processors.",
     examples=examples,
     run_examples_on_click=False,
     cache_examples=False,