|
|
--- |
|
|
language: |
|
|
- multilingual |
|
|
base_model: |
|
|
- nanonets/Nanonets-OCR2-3B |
|
|
tags: |
|
|
- OCR |
|
|
- image-to-text |
|
|
- pdf2markdown |
|
|
- VQA |
|
|
pipeline_tag: image-text-to-text |
|
|
library_name: transformers |
|
|
|
|
|
--- |
|
|
|
|
|
Creation Code |
|
|
|
|
|
```python |
|
|
from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration |
|
|
|
|
|
from llmcompressor import oneshot |
|
|
from llmcompressor.modifiers.quantization import QuantizationModifier |
|
|
from llmcompressor.utils import dispatch_for_generation |
|
|
|
|
|
MODEL_ID = "nanonets/Nanonets-OCR2-3B" |
|
|
|
|
|
# Load model. |
|
|
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(MODEL_ID, torch_dtype="auto") |
|
|
processor = AutoProcessor.from_pretrained(MODEL_ID) |
|
|
|
|
|
# Configure the quantization algorithm and scheme. |
|
|
# In this case, we: |
|
|
# * quantize the weights to fp8 with per channel via ptq |
|
|
# * quantize the activations to fp8 with dynamic per token |
|
|
recipe = QuantizationModifier( |
|
|
targets="Linear", |
|
|
scheme="FP8_DYNAMIC", |
|
|
ignore=["lm_head", "re:visual.*", "re:model.visual.*"], |
|
|
) |
|
|
|
|
|
# Apply quantization and save to disk in compressed-tensors format. |
|
|
oneshot(model=model, recipe=recipe) |
|
|
|
|
|
``` |