giovo17 commited on
Commit
5d1bf21
·
verified ·
1 Parent(s): f4020c7

Upload config-lock.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config-lock.yaml +118 -0
config-lock.yaml ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 42
2
+ log_every_iters: 1000
3
+ save_every_iters: 10000
4
+ eval_every_iters: 10000
5
+ update_pbar_every_iters: 100
6
+ time_limit_sec: -1
7
+ checkpoints_retain_n: 5
8
+ model_base_name: tfs_mt
9
+ model_parameters:
10
+ dropout: 0.1
11
+ model_configs:
12
+ pretrained_word_embeddings: GloVe
13
+ positional_embeddings: sinusoidal
14
+ nano:
15
+ num_encoder_layers: 4
16
+ num_decoder_layers: 4
17
+ d_model: 50
18
+ num_heads: 4
19
+ d_ff: 200
20
+ norm_type: postnorm
21
+ glove_version: glove.2024.wikigiga.50d
22
+ glove_filename: wiki_giga_2024_50_MFT20_vectors_seed_123_alpha_0.75_eta_0.075_combined
23
+ small:
24
+ num_encoder_layers: 6
25
+ num_decoder_layers: 6
26
+ d_model: 100
27
+ num_heads: 6
28
+ d_ff: 400
29
+ norm_type: postnorm
30
+ glove_version: glove.2024.wikigiga.100d
31
+ glove_filename: wiki_giga_2024_100_MFT20_vectors_seed_2024_alpha_0.75_eta_0.05.050_combined
32
+ base:
33
+ num_encoder_layers: 8
34
+ num_decoder_layers: 8
35
+ d_model: 300
36
+ num_heads: 8
37
+ d_ff: 800
38
+ norm_type: postnorm
39
+ glove_version: glove.2024.wikigiga.300d
40
+ glove_filename: wiki_giga_2024_300_MFT20_vectors_seed_2024_alpha_0.75_eta_0.05_combined
41
+ original:
42
+ num_encoder_layers: 6
43
+ num_decoder_layers: 6
44
+ d_model: 512
45
+ num_heads: 8
46
+ d_ff: 2048
47
+ norm_type: postnorm
48
+ training_hp:
49
+ num_epochs: 2
50
+ use_amp: true
51
+ amp_dtype: bfloat16
52
+ torch_compile_mode: max-autotune
53
+ loss:
54
+ type: crossentropy
55
+ label_smoothing: 0.1
56
+ optimizer:
57
+ type: AdamW
58
+ weight_decay: 0.0001
59
+ beta1: 0.9
60
+ beta2: 0.999
61
+ eps: 1.0e-08
62
+ lr_scheduler:
63
+ type: original
64
+ min_lr: 0.0003
65
+ max_lr: 0.001
66
+ warmup_iters: 25000
67
+ stable_iters_prop: 0.7
68
+ max_gradient_norm: 5.0
69
+ early_stopping:
70
+ enabled: false
71
+ patience: 40000
72
+ min_delta: 1.0e-05
73
+ tokenizer:
74
+ type: word
75
+ sos_token: <s>
76
+ eos_token: </s>
77
+ pad_token: <PAD>
78
+ unk_token: <UNK>
79
+ max_seq_len: 131
80
+ max_vocab_size: 70000
81
+ vocab_min_freq: 2
82
+ src_sos_token_idx: 60932
83
+ src_eos_token_idx: 60854
84
+ src_pad_token_idx: 18895
85
+ src_unk_token_idx: 3358
86
+ tgt_sos_token_idx: 60933
87
+ tgt_eos_token_idx: 60860
88
+ tgt_pad_token_idx: 18800
89
+ tgt_unk_token_idx: 3289
90
+ dataset:
91
+ dataset_task: machine-translation
92
+ dataset_id: Helsinki-NLP/europarl
93
+ dataset_name: en-it
94
+ train_split: 0.95
95
+ src_lang: en
96
+ tgt_lang: it
97
+ max_len: -1
98
+ train_dataloader:
99
+ batch_size: 64
100
+ num_workers: 4
101
+ shuffle: true
102
+ drop_last: true
103
+ prefetch_factor: 2
104
+ pad_all_to_max_len: true
105
+ test_dataloader:
106
+ batch_size: 128
107
+ num_workers: 4
108
+ shuffle: false
109
+ drop_last: false
110
+ prefetch_factor: 2
111
+ pad_all_to_max_len: true
112
+ chosen_model_size: small
113
+ model_name: tfs_mt_small_260207-0915
114
+ exec_mode: dev
115
+ src_tokenizer_vocab_size: 70000
116
+ tgt_tokenizer_vocab_size: 70000
117
+ num_train_iters_per_epoch: 28889
118
+ num_test_iters_per_epoch: 761