madiedgar commited on
Commit
c0ea238
·
verified ·
1 Parent(s): 7137d71

Upload folder using huggingface_hub

Browse files
tiny-aya-base/condition-5-es-5k-c4ai-aya-expanse-32b-seed42/adapter_config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": {
6
+ "base_model_class": "Cohere2ForCausalLM",
7
+ "parent_library": "transformers.models.cohere2.modeling_cohere2",
8
+ "unsloth_fixed": true
9
+ },
10
+ "base_model_name_or_path": "CohereLabs/tiny-aya-base",
11
+ "bias": "none",
12
+ "corda_config": null,
13
+ "ensure_weight_tying": false,
14
+ "eva_config": null,
15
+ "exclude_modules": null,
16
+ "fan_in_fan_out": false,
17
+ "inference_mode": true,
18
+ "init_lora_weights": true,
19
+ "layer_replication": null,
20
+ "layers_pattern": null,
21
+ "layers_to_transform": null,
22
+ "loftq_config": {},
23
+ "lora_alpha": 32,
24
+ "lora_bias": false,
25
+ "lora_dropout": 0.0,
26
+ "megatron_config": null,
27
+ "megatron_core": "megatron.core",
28
+ "modules_to_save": null,
29
+ "peft_type": "LORA",
30
+ "peft_version": "0.18.1",
31
+ "qalora_group_size": 16,
32
+ "r": 16,
33
+ "rank_pattern": {},
34
+ "revision": null,
35
+ "target_modules": [
36
+ "gate_proj",
37
+ "down_proj",
38
+ "v_proj",
39
+ "up_proj",
40
+ "o_proj",
41
+ "k_proj",
42
+ "q_proj"
43
+ ],
44
+ "target_parameters": null,
45
+ "task_type": "CAUSAL_LM",
46
+ "trainable_token_indices": null,
47
+ "use_dora": false,
48
+ "use_qalora": false,
49
+ "use_rslora": false
50
+ }
tiny-aya-base/condition-5-es-5k-c4ai-aya-expanse-32b-seed42/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb2b3994335ce977a423649970594e4a4e921273282b36e4bd4d86b961189ecb
3
+ size 120981200
tiny-aya-base/condition-5-es-5k-c4ai-aya-expanse-32b-seed42/training_metrics.json ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config_name": "tiny-aya-base",
3
+ "condition_name": "condition-5-es-5k-c4ai-aya-expanse-32b",
4
+ "seed": 42,
5
+ "output_name": "condition-5-es-5k-c4ai-aya-expanse-32b-seed42",
6
+ "train_result": {
7
+ "train_runtime": 3242.2877,
8
+ "train_samples_per_second": 1.244,
9
+ "train_steps_per_second": 0.078,
10
+ "total_flos": 7.04182494471127e+16,
11
+ "train_loss": 0.9694745190559871,
12
+ "epoch": 1.0
13
+ },
14
+ "log_history": [
15
+ {
16
+ "loss": 1.5913,
17
+ "grad_norm": 0.14458343386650085,
18
+ "learning_rate": 0.00013846153846153847,
19
+ "epoch": 0.03968253968253968,
20
+ "step": 10
21
+ },
22
+ {
23
+ "loss": 1.3842,
24
+ "grad_norm": 0.2105778157711029,
25
+ "learning_rate": 0.00019968914967963337,
26
+ "epoch": 0.07936507936507936,
27
+ "step": 20
28
+ },
29
+ {
30
+ "loss": 1.1885,
31
+ "grad_norm": 0.1847052276134491,
32
+ "learning_rate": 0.00019779650288536058,
33
+ "epoch": 0.11904761904761904,
34
+ "step": 30
35
+ },
36
+ {
37
+ "loss": 1.0691,
38
+ "grad_norm": 0.19913208484649658,
39
+ "learning_rate": 0.000194216519005939,
40
+ "epoch": 0.15873015873015872,
41
+ "step": 40
42
+ },
43
+ {
44
+ "loss": 1.0204,
45
+ "grad_norm": 0.21405866742134094,
46
+ "learning_rate": 0.00018901096548075305,
47
+ "epoch": 0.1984126984126984,
48
+ "step": 50
49
+ },
50
+ {
51
+ "loss": 0.9831,
52
+ "grad_norm": 0.22955435514450073,
53
+ "learning_rate": 0.00018226965660021836,
54
+ "epoch": 0.23809523809523808,
55
+ "step": 60
56
+ },
57
+ {
58
+ "loss": 0.9517,
59
+ "grad_norm": 0.2320040464401245,
60
+ "learning_rate": 0.00017410890389021736,
61
+ "epoch": 0.2777777777777778,
62
+ "step": 70
63
+ },
64
+ {
65
+ "loss": 0.9813,
66
+ "grad_norm": 0.24627013504505157,
67
+ "learning_rate": 0.0001646695093250953,
68
+ "epoch": 0.31746031746031744,
69
+ "step": 80
70
+ },
71
+ {
72
+ "loss": 0.9455,
73
+ "grad_norm": 0.24273602664470673,
74
+ "learning_rate": 0.00015411433599342038,
75
+ "epoch": 0.35714285714285715,
76
+ "step": 90
77
+ },
78
+ {
79
+ "loss": 0.9037,
80
+ "grad_norm": 0.22931845486164093,
81
+ "learning_rate": 0.0001426254981311545,
82
+ "epoch": 0.3968253968253968,
83
+ "step": 100
84
+ },
85
+ {
86
+ "loss": 0.8979,
87
+ "grad_norm": 0.2520875632762909,
88
+ "learning_rate": 0.0001304012190041437,
89
+ "epoch": 0.4365079365079365,
90
+ "step": 110
91
+ },
92
+ {
93
+ "loss": 0.9242,
94
+ "grad_norm": 0.2471681386232376,
95
+ "learning_rate": 0.00011765241085261802,
96
+ "epoch": 0.47619047619047616,
97
+ "step": 120
98
+ },
99
+ {
100
+ "loss": 0.8773,
101
+ "grad_norm": 0.2511087954044342,
102
+ "learning_rate": 0.00010459903590580706,
103
+ "epoch": 0.5158730158730159,
104
+ "step": 130
105
+ },
106
+ {
107
+ "loss": 0.9526,
108
+ "grad_norm": 0.25003281235694885,
109
+ "learning_rate": 9.146631125209607e-05,
110
+ "epoch": 0.5555555555555556,
111
+ "step": 140
112
+ },
113
+ {
114
+ "loss": 0.8778,
115
+ "grad_norm": 0.2581188678741455,
116
+ "learning_rate": 7.848082304419478e-05,
117
+ "epoch": 0.5952380952380952,
118
+ "step": 150
119
+ },
120
+ {
121
+ "loss": 0.8969,
122
+ "grad_norm": 0.2540159225463867,
123
+ "learning_rate": 6.586661708308272e-05,
124
+ "epoch": 0.6349206349206349,
125
+ "step": 160
126
+ },
127
+ {
128
+ "loss": 0.872,
129
+ "grad_norm": 0.23870062828063965,
130
+ "learning_rate": 5.3841333232046654e-05,
131
+ "epoch": 0.6746031746031746,
132
+ "step": 170
133
+ },
134
+ {
135
+ "loss": 0.9307,
136
+ "grad_norm": 0.257695734500885,
137
+ "learning_rate": 4.261245035589917e-05,
138
+ "epoch": 0.7142857142857143,
139
+ "step": 180
140
+ },
141
+ {
142
+ "loss": 0.8996,
143
+ "grad_norm": 0.25319522619247437,
144
+ "learning_rate": 3.2373706573516794e-05,
145
+ "epoch": 0.753968253968254,
146
+ "step": 190
147
+ },
148
+ {
149
+ "loss": 0.8269,
150
+ "grad_norm": 0.28425028920173645,
151
+ "learning_rate": 2.3301756587057987e-05,
152
+ "epoch": 0.7936507936507936,
153
+ "step": 200
154
+ },
155
+ {
156
+ "loss": 0.8738,
157
+ "grad_norm": 0.2507912218570709,
158
+ "learning_rate": 1.5553123760806143e-05,
159
+ "epoch": 0.8333333333333334,
160
+ "step": 210
161
+ },
162
+ {
163
+ "loss": 0.8959,
164
+ "grad_norm": 0.24545952677726746,
165
+ "learning_rate": 9.261499537105177e-06,
166
+ "epoch": 0.873015873015873,
167
+ "step": 220
168
+ },
169
+ {
170
+ "loss": 0.8416,
171
+ "grad_norm": 0.26205167174339294,
172
+ "learning_rate": 4.535436784056269e-06,
173
+ "epoch": 0.9126984126984127,
174
+ "step": 230
175
+ },
176
+ {
177
+ "loss": 0.8351,
178
+ "grad_norm": 0.2631818652153015,
179
+ "learning_rate": 1.4564768729220412e-06,
180
+ "epoch": 0.9523809523809523,
181
+ "step": 240
182
+ },
183
+ {
184
+ "loss": 0.8215,
185
+ "grad_norm": 0.27556470036506653,
186
+ "learning_rate": 7.774279980626853e-08,
187
+ "epoch": 0.9920634920634921,
188
+ "step": 250
189
+ },
190
+ {
191
+ "train_runtime": 3242.2877,
192
+ "train_samples_per_second": 1.244,
193
+ "train_steps_per_second": 0.078,
194
+ "total_flos": 7.04182494471127e+16,
195
+ "train_loss": 0.9694745190559871,
196
+ "epoch": 1.0,
197
+ "step": 252
198
+ }
199
+ ]
200
+ }