Update README.md
Browse files
README.md
CHANGED
|
@@ -116,11 +116,13 @@ Although AutoGPTQ was used for this particular model, Neural Magic is transition
|
|
| 116 |
```python
|
| 117 |
from transformers import AutoTokenizer
|
| 118 |
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
|
|
|
|
| 119 |
import random
|
| 120 |
|
| 121 |
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
|
| 122 |
|
| 123 |
num_samples = 512
|
|
|
|
| 124 |
|
| 125 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 126 |
|
|
@@ -131,7 +133,7 @@ dataset = load_dataset(dataset_name, split="train")
|
|
| 131 |
ds = dataset.shuffle().select(range(num_samples))
|
| 132 |
ds = ds.map(preprocess_fn)
|
| 133 |
|
| 134 |
-
|
| 135 |
tokenizer(
|
| 136 |
example["text"], padding=False, max_length=max_seq_len, truncation=True,
|
| 137 |
) for example in ds
|
|
|
|
| 116 |
```python
|
| 117 |
from transformers import AutoTokenizer
|
| 118 |
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
|
| 119 |
+
from datasets import load_dataset
|
| 120 |
import random
|
| 121 |
|
| 122 |
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
|
| 123 |
|
| 124 |
num_samples = 512
|
| 125 |
+
max_seq_len = 4096
|
| 126 |
|
| 127 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 128 |
|
|
|
|
| 133 |
ds = dataset.shuffle().select(range(num_samples))
|
| 134 |
ds = ds.map(preprocess_fn)
|
| 135 |
|
| 136 |
+
examples = [
|
| 137 |
tokenizer(
|
| 138 |
example["text"], padding=False, max_length=max_seq_len, truncation=True,
|
| 139 |
) for example in ds
|