benjamin commited on
Commit
ec20474
·
verified ·
1 Parent(s): 5ec211b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -4
README.md CHANGED
@@ -65,14 +65,14 @@ python3 scripts/cross_tokenizer_distill.py \
65
  output_embeddings_mode=untie \
66
  eval.tasks=[arc_easy,arc_challenge,piqa,boolq,arithmetic,mmlu,ifeval,agieval_en,agieval_cn] \
67
  data.batch_size=32 \
68
- student.pretrained_model_name_or_path=benjamin/gemma-2-2b-it-flax \
69
- student.tokenizer_name=google/gemma-2-2b-it:source=Gemma2 \
70
- target_tokenizer_name=google/gemma-2-2b-it:source=Gemma2:target=Gemma2:conversion=byte \
71
  n_model_parallel=4 \
72
  n_data_parallel=4 \
73
  data.num_workers=16 \
74
  num_workers=16 \
75
- name=gemma2_to_byte_20k
76
  ```
77
 
78
  ## Future Work
 
65
  output_embeddings_mode=untie \
66
  eval.tasks=[arc_easy,arc_challenge,piqa,boolq,arithmetic,mmlu,ifeval,agieval_en,agieval_cn] \
67
  data.batch_size=32 \
68
+ student.pretrained_model_name_or_path=benjamin/Llama-3.2-3B-Instruct-flax \
69
+ student.tokenizer_name=meta-llama/Llama-3.2-3B-Instruct:source=Llama3 \
70
+ target_tokenizer_name=meta-llama/Llama-3.2-3B-Instruct:source=Llama3:target=Llama3:conversion=byte \
71
  n_model_parallel=4 \
72
  n_data_parallel=4 \
73
  data.num_workers=16 \
74
  num_workers=16 \
75
+ name=llama3_to_byte_20k
76
  ```
77
 
78
  ## Future Work