Spaces:
Sleeping
Sleeping
update space description
Browse files
README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: green
|
| 6 |
sdk: gradio
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Fast quantized SmolVLM2
|
| 3 |
+
emoji: ⚡
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: green
|
| 6 |
sdk: gradio
|
app.py
CHANGED
|
@@ -162,7 +162,8 @@ def process_history(history: list[dict]) -> list[dict]:
|
|
| 162 |
|
| 163 |
|
| 164 |
@torch.inference_mode()
|
| 165 |
-
def generate(message: dict, history: list[dict], model_id: str,
|
|
|
|
| 166 |
|
| 167 |
update_model(model_id)
|
| 168 |
processor = model_cache["processor"]
|
|
@@ -215,8 +216,8 @@ def generate(message: dict, history: list[dict], model_id: str, system_prompt: s
|
|
| 215 |
examples = [
|
| 216 |
[
|
| 217 |
{
|
| 218 |
-
"text": "What is the
|
| 219 |
-
"files": [],
|
| 220 |
}
|
| 221 |
],
|
| 222 |
[
|
|
@@ -246,23 +247,17 @@ examples = [
|
|
| 246 |
],
|
| 247 |
[
|
| 248 |
{
|
| 249 |
-
"text": "What is
|
| 250 |
-
"files": [
|
| 251 |
}
|
| 252 |
],
|
| 253 |
]
|
| 254 |
|
| 255 |
|
| 256 |
model_choices = [
|
| 257 |
-
# "echarlaix/SmolVLM2-2.2B-Instruct-openvino",
|
| 258 |
-
# "echarlaix/SmolVLM-256M-Instruct-openvino",
|
| 259 |
"echarlaix/SmolVLM2-500M-Video-Instruct-openvino",
|
| 260 |
-
# "echarlaix/SmolVLM2-256M-Video-Instruct-openvino",
|
| 261 |
-
# "echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-woq-data-free",
|
| 262 |
-
# "echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-static",
|
| 263 |
"echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static",
|
| 264 |
"echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free",
|
| 265 |
-
# "echarlaix/SmolVLM2-2.2B-Instruct-openvino-8bit-static",
|
| 266 |
]
|
| 267 |
|
| 268 |
demo = gr.ChatInterface(
|
|
@@ -276,12 +271,12 @@ demo = gr.ChatInterface(
|
|
| 276 |
multimodal=True,
|
| 277 |
additional_inputs=[
|
| 278 |
gr.Dropdown(model_choices, value=model_choices[0], label="Model ID"),
|
| 279 |
-
gr.Textbox(label="System Prompt", value="You are a helpful assistant."),
|
| 280 |
gr.Slider(label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700),
|
| 281 |
],
|
| 282 |
stop_btn=False,
|
| 283 |
title="Fast quantized SmolVLM2 ⚡",
|
| 284 |
-
description="Play with a [SmolVLM2-500M-Video-Instruct](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino) and its quantized
|
| 285 |
examples=examples,
|
| 286 |
run_examples_on_click=False,
|
| 287 |
cache_examples=False,
|
|
|
|
| 162 |
|
| 163 |
|
| 164 |
@torch.inference_mode()
|
| 165 |
+
def generate(message: dict, history: list[dict], model_id: str, max_new_tokens: int = 512) -> Iterator[str]:
|
| 166 |
+
system_prompt = "You are a helpful assistant."
|
| 167 |
|
| 168 |
update_model(model_id)
|
| 169 |
processor = model_cache["processor"]
|
|
|
|
| 216 |
examples = [
|
| 217 |
[
|
| 218 |
{
|
| 219 |
+
"text": "What is on the flower?",
|
| 220 |
+
"files": ["assets/bee.jpg"],
|
| 221 |
}
|
| 222 |
],
|
| 223 |
[
|
|
|
|
| 247 |
],
|
| 248 |
[
|
| 249 |
{
|
| 250 |
+
"text": "What is the capital of France?",
|
| 251 |
+
"files": [],
|
| 252 |
}
|
| 253 |
],
|
| 254 |
]
|
| 255 |
|
| 256 |
|
| 257 |
model_choices = [
|
|
|
|
|
|
|
| 258 |
"echarlaix/SmolVLM2-500M-Video-Instruct-openvino",
|
|
|
|
|
|
|
|
|
|
| 259 |
"echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static",
|
| 260 |
"echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free",
|
|
|
|
| 261 |
]
|
| 262 |
|
| 263 |
demo = gr.ChatInterface(
|
|
|
|
| 271 |
multimodal=True,
|
| 272 |
additional_inputs=[
|
| 273 |
gr.Dropdown(model_choices, value=model_choices[0], label="Model ID"),
|
| 274 |
+
# gr.Textbox(label="System Prompt", value="You are a helpful assistant."),
|
| 275 |
gr.Slider(label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700),
|
| 276 |
],
|
| 277 |
stop_btn=False,
|
| 278 |
title="Fast quantized SmolVLM2 ⚡",
|
| 279 |
+
description="Play with a [SmolVLM2-500M-Video-Instruct](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino) and its quantized variants : [SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free) and [SmolVLM2-500M-Video-Instruct-openvino-8bit-static](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static) both obtained by respectively applying Weight-Only Quantization and Static Quantization using [Optimum Intel](https://github.com/huggingface/optimum-intel) NNCF integration. To get started, upload an image and text or try one of the examples. This demo runs on 4th Generation Intel Xeon (Sapphire Rapids) processors.",
|
| 280 |
examples=examples,
|
| 281 |
run_examples_on_click=False,
|
| 282 |
cache_examples=False,
|