Commit
·
9795db4
1
Parent(s):
449853b
Update README
Browse files
README.md
CHANGED
|
@@ -1,3 +1,41 @@
|
|
| 1 |
---
|
| 2 |
license: apache-2.0
|
| 3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
license: apache-2.0
|
| 3 |
---
|
| 4 |
+
# Quantized BERT-base model with 90% of ustructured sparsity
|
| 5 |
+
The pruned and quantized model in the OpenVINO IR. The pruned model was taken from this source and quantized with the code below using HF Optimum for OpenVINO:
|
| 6 |
+
|
| 7 |
+
```python
|
| 8 |
+
from functools import partial
|
| 9 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 10 |
+
from optimum.intel.openvino import OVConfig, OVQuantizer
|
| 11 |
+
|
| 12 |
+
model_id = "neuralmagic/oBERT-12-downstream-pruned-unstructured-90-mnli" #"typeform/distilbert-base-uncased-mnli"
|
| 13 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_id)
|
| 14 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 15 |
+
save_dir = "./nm_mnli_90"
|
| 16 |
+
|
| 17 |
+
def preprocess_function(examples, tokenizer):
|
| 18 |
+
return tokenizer(examples["premise"], examples["hypothesis"], padding="max_length", max_length=128, truncation=True)
|
| 19 |
+
|
| 20 |
+
# Load the default quantization configuration detailing the quantization we wish to apply
|
| 21 |
+
quantization_config = OVConfig()
|
| 22 |
+
# Instantiate our OVQuantizer using the desired configuration
|
| 23 |
+
quantizer = OVQuantizer.from_pretrained(model)
|
| 24 |
+
# Create the calibration dataset used to perform static quantization
|
| 25 |
+
|
| 26 |
+
calibration_dataset = quantizer.get_calibration_dataset(
|
| 27 |
+
"glue",
|
| 28 |
+
dataset_config_name="mnli",
|
| 29 |
+
preprocess_function=partial(preprocess_function, tokenizer=tokenizer),
|
| 30 |
+
num_samples=100,
|
| 31 |
+
dataset_split="train",
|
| 32 |
+
)
|
| 33 |
+
# Apply static quantization and export the resulting quantized model to OpenVINO IR format
|
| 34 |
+
quantizer.quantize(
|
| 35 |
+
quantization_config=quantization_config,
|
| 36 |
+
calibration_dataset=calibration_dataset,
|
| 37 |
+
save_directory=save_dir,
|
| 38 |
+
)
|
| 39 |
+
# Save the tokenizer
|
| 40 |
+
tokenizer.save_pretrained(save_dir)
|
| 41 |
+
```
|