Update README.md
Browse files
README.md
CHANGED
|
@@ -65,14 +65,14 @@ python3 scripts/cross_tokenizer_distill.py \
|
|
| 65 |
output_embeddings_mode=untie \
|
| 66 |
eval.tasks=[arc_easy,arc_challenge,piqa,boolq,arithmetic,mmlu,ifeval,agieval_en,agieval_cn] \
|
| 67 |
data.batch_size=32 \
|
| 68 |
-
student.pretrained_model_name_or_path=benjamin/
|
| 69 |
-
student.tokenizer_name=
|
| 70 |
-
target_tokenizer_name=
|
| 71 |
n_model_parallel=4 \
|
| 72 |
n_data_parallel=4 \
|
| 73 |
data.num_workers=16 \
|
| 74 |
num_workers=16 \
|
| 75 |
-
name=
|
| 76 |
```
|
| 77 |
|
| 78 |
## Future Work
|
|
|
|
| 65 |
output_embeddings_mode=untie \
|
| 66 |
eval.tasks=[arc_easy,arc_challenge,piqa,boolq,arithmetic,mmlu,ifeval,agieval_en,agieval_cn] \
|
| 67 |
data.batch_size=32 \
|
| 68 |
+
student.pretrained_model_name_or_path=benjamin/Llama-3.2-3B-Instruct-flax \
|
| 69 |
+
student.tokenizer_name=meta-llama/Llama-3.2-3B-Instruct:source=Llama3 \
|
| 70 |
+
target_tokenizer_name=meta-llama/Llama-3.2-3B-Instruct:source=Llama3:target=Llama3:conversion=byte \
|
| 71 |
n_model_parallel=4 \
|
| 72 |
n_data_parallel=4 \
|
| 73 |
data.num_workers=16 \
|
| 74 |
num_workers=16 \
|
| 75 |
+
name=llama3_to_byte_20k
|
| 76 |
```
|
| 77 |
|
| 78 |
## Future Work
|