aixk commited on
Commit
3d372d1
·
1 Parent(s): 787cd03

final purge backups 1/1

Browse files
shared/auto_workers/kosine/00913a92a2d4-2805.json DELETED
@@ -1 +0,0 @@
1
- {"worker_id": "00913a92a2d4-2805", "stage": 1, "model_name": "kosine", "updated_at": 1777216755}
 
 
shared/auto_workers/kosine/09572e9649ff-14424.json DELETED
@@ -1 +0,0 @@
1
- {"worker_id": "09572e9649ff-14424", "stage": 1, "model_name": "kosine", "updated_at": 1777214974}
 
 
shared/auto_workers/kosine/1f24e698ef4b-2772.json DELETED
@@ -1 +0,0 @@
1
- {"worker_id": "1f24e698ef4b-2772", "stage": 1, "model_name": "kosine", "updated_at": 1777195166}
 
 
shared/auto_workers/kosine/326e386af23c-3264.json DELETED
@@ -1 +0,0 @@
1
- {"worker_id": "326e386af23c-3264", "stage": 1, "model_name": "kosine", "updated_at": 1777214972}
 
 
shared/auto_workers/kosine/616f62fcd3a6-5480.json DELETED
@@ -1 +0,0 @@
1
- {"worker_id": "616f62fcd3a6-5480", "stage": 1, "model_name": "kosine", "updated_at": 1777198863}
 
 
shared/auto_workers/kosine/7cf5e805e860-2347.json DELETED
@@ -1 +0,0 @@
1
- {"worker_id": "7cf5e805e860-2347", "stage": 1, "model_name": "kosine", "updated_at": 1777198867}
 
 
shared/auto_workers/kosine/9da8c8a9b5e0-6107.json DELETED
@@ -1 +0,0 @@
1
- {"worker_id": "9da8c8a9b5e0-6107", "stage": 1, "model_name": "kosine", "updated_at": 1777198863}
 
 
shared/auto_workers/kosine/bf14fb62a2d5-908.json DELETED
@@ -1 +0,0 @@
1
- {"worker_id": "bf14fb62a2d5-908", "stage": 1, "model_name": "kosine", "updated_at": 1777195165}
 
 
shared/auto_workers/kosine/e67068e0850e-497.json DELETED
@@ -1 +0,0 @@
1
- {"worker_id": "e67068e0850e-497", "stage": 1, "model_name": "kosine", "updated_at": 1777195168}
 
 
shared/checkpoints/checkpoint-490/config.json DELETED
@@ -1,24 +0,0 @@
1
- {
2
- "architectures": [
3
- "KosineForCausalLM"
4
- ],
5
- "attention_dropout": 0.0,
6
- "dtype": "float32",
7
- "hidden_dropout": 0.0,
8
- "hidden_size": 384,
9
- "initializer_range": 0.02,
10
- "intermediate_size": 1536,
11
- "max_position_embeddings": 512,
12
- "model_type": "fasty",
13
- "neftune_alpha": 0.0,
14
- "num_attention_heads": 8,
15
- "num_hidden_layers": 24,
16
- "num_key_value_heads": 2,
17
- "qk_norm": true,
18
- "rezero_init": 1.0,
19
- "rms_norm_eps": 1e-06,
20
- "rope_theta": 10000.0,
21
- "transformers_version": "5.0.0",
22
- "use_cache": false,
23
- "vocab_size": 32000
24
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
shared/checkpoints/checkpoint-490/trainer_state.json DELETED
@@ -1,209 +0,0 @@
1
- {
2
- "best_global_step": null,
3
- "best_metric": null,
4
- "best_model_checkpoint": null,
5
- "epoch": 2.788524492589727,
6
- "eval_steps": 500,
7
- "global_step": 490,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.005713945598476281,
14
- "grad_norm": Infinity,
15
- "learning_rate": 5e-05,
16
- "loss": 1003.1537475585938,
17
- "step": 1
18
- },
19
- {
20
- "epoch": 0.11427891196952562,
21
- "grad_norm": 166.87982177734375,
22
- "learning_rate": 4.9914347503344016e-05,
23
- "loss": 933.5017475328947,
24
- "step": 20
25
- },
26
- {
27
- "epoch": 0.22855782393905125,
28
- "grad_norm": 152.6066131591797,
29
- "learning_rate": 4.9595127708329773e-05,
30
- "loss": 876.15302734375,
31
- "step": 40
32
- },
33
- {
34
- "epoch": 0.3428367359085769,
35
- "grad_norm": 162.4434814453125,
36
- "learning_rate": 4.904269857960207e-05,
37
- "loss": 826.28603515625,
38
- "step": 60
39
- },
40
- {
41
- "epoch": 0.4571156478781025,
42
- "grad_norm": 198.8935546875,
43
- "learning_rate": 4.82622982127123e-05,
44
- "loss": 790.21640625,
45
- "step": 80
46
- },
47
- {
48
- "epoch": 0.5713945598476281,
49
- "grad_norm": 292.5304260253906,
50
- "learning_rate": 4.7316475236404454e-05,
51
- "loss": 758.520751953125,
52
- "step": 100
53
- },
54
- {
55
- "epoch": 0.6856734718171538,
56
- "grad_norm": 161.1084747314453,
57
- "learning_rate": 4.611472024403491e-05,
58
- "loss": 732.02783203125,
59
- "step": 120
60
- },
61
- {
62
- "epoch": 0.7999523837866793,
63
- "grad_norm": 476.92889404296875,
64
- "learning_rate": 4.471275690560381e-05,
65
- "loss": 707.32529296875,
66
- "step": 140
67
- },
68
- {
69
- "epoch": 0.914231295756205,
70
- "grad_norm": 232.22218322753906,
71
- "learning_rate": 4.312387854245201e-05,
72
- "loss": 689.078271484375,
73
- "step": 160
74
- },
75
- {
76
- "epoch": 1.022855782393905,
77
- "grad_norm": 324.50347900390625,
78
- "learning_rate": 4.136315079151446e-05,
79
- "loss": 637.46904296875,
80
- "step": 180
81
- },
82
- {
83
- "epoch": 1.1371346943634308,
84
- "grad_norm": 290.04522705078125,
85
- "learning_rate": 3.944726875397202e-05,
86
- "loss": 652.662060546875,
87
- "step": 200
88
- },
89
- {
90
- "epoch": 1.2514136063329564,
91
- "grad_norm": 144.04925537109375,
92
- "learning_rate": 3.73943986934338e-05,
93
- "loss": 641.387353515625,
94
- "step": 220
95
- },
96
- {
97
- "epoch": 1.365692518302482,
98
- "grad_norm": 269.99884033203125,
99
- "learning_rate": 3.5224005784657e-05,
100
- "loss": 619.291015625,
101
- "step": 240
102
- },
103
- {
104
- "epoch": 1.4799714302720077,
105
- "grad_norm": 263.09124755859375,
106
- "learning_rate": 3.295666954607935e-05,
107
- "loss": 601.540673828125,
108
- "step": 260
109
- },
110
- {
111
- "epoch": 1.5942503422415333,
112
- "grad_norm": 612.1905517578125,
113
- "learning_rate": 3.0613888706220336e-05,
114
- "loss": 589.492138671875,
115
- "step": 280
116
- },
117
- {
118
- "epoch": 1.7085292542110588,
119
- "grad_norm": 428.7916564941406,
120
- "learning_rate": 2.8217877354194643e-05,
121
- "loss": 577.317236328125,
122
- "step": 300
123
- },
124
- {
125
- "epoch": 1.8228081661805846,
126
- "grad_norm": 413.7802734375,
127
- "learning_rate": 2.5791354307225036e-05,
128
- "loss": 568.5466796875,
129
- "step": 320
130
- },
131
- {
132
- "epoch": 1.9370870781501102,
133
- "grad_norm": 602.0125732421875,
134
- "learning_rate": 2.335732769235743e-05,
135
- "loss": 556.334326171875,
136
- "step": 340
137
- },
138
- {
139
- "epoch": 2.04571156478781,
140
- "grad_norm": 396.1126403808594,
141
- "learning_rate": 2.093887678495993e-05,
142
- "loss": 526.214013671875,
143
- "step": 360
144
- },
145
- {
146
- "epoch": 2.159990476757336,
147
- "grad_norm": 345.9240417480469,
148
- "learning_rate": 1.8558933172598236e-05,
149
- "loss": 544.178369140625,
150
- "step": 380
151
- },
152
- {
153
- "epoch": 2.2742693887268617,
154
- "grad_norm": 389.1020812988281,
155
- "learning_rate": 1.6240063319276767e-05,
156
- "loss": 536.914501953125,
157
- "step": 400
158
- },
159
- {
160
- "epoch": 2.388548300696387,
161
- "grad_norm": 534.6370239257812,
162
- "learning_rate": 1.400425459175672e-05,
163
- "loss": 534.46005859375,
164
- "step": 420
165
- },
166
- {
167
- "epoch": 2.502827212665913,
168
- "grad_norm": 430.8108215332031,
169
- "learning_rate": 1.1872706776834838e-05,
170
- "loss": 528.34111328125,
171
- "step": 440
172
- },
173
- {
174
- "epoch": 2.6171061246354386,
175
- "grad_norm": 357.02899169921875,
176
- "learning_rate": 9.865631066402137e-06,
177
- "loss": 524.981494140625,
178
- "step": 460
179
- },
180
- {
181
- "epoch": 2.731385036604964,
182
- "grad_norm": 286.9407043457031,
183
- "learning_rate": 8.002058416292329e-06,
184
- "loss": 522.376416015625,
185
- "step": 480
186
- }
187
- ],
188
- "logging_steps": 20,
189
- "max_steps": 645,
190
- "num_input_tokens_seen": 0,
191
- "num_train_epochs": 4,
192
- "save_steps": 1000000000,
193
- "stateful_callbacks": {
194
- "TrainerControl": {
195
- "args": {
196
- "should_epoch_stop": false,
197
- "should_evaluate": false,
198
- "should_log": false,
199
- "should_save": true,
200
- "should_training_stop": false
201
- },
202
- "attributes": {}
203
- }
204
- },
205
- "total_flos": 9672588933120000.0,
206
- "train_batch_size": 1,
207
- "trial_name": null,
208
- "trial_params": null
209
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
shared/checkpoints/owner_backup_latest.json DELETED
@@ -1 +0,0 @@
1
- {"owner_worker_id": "616f62fcd3a6-5480", "checkpoint": "shared/checkpoints/checkpoint-490", "checkpoint_name": "checkpoint-490", "step": 490, "updated_at": 1777212552, "status": "owner_backup_ok"}
 
 
shared/resume/latest.json DELETED
@@ -1 +0,0 @@
1
- {"worker_id": "616f62fcd3a6-5480", "checkpoint": "shared/checkpoints/checkpoint-490", "step": 490, "updated_at": 1777212552}
 
 
shared/slot_plan/kosine/stage1.json DELETED
@@ -1 +0,0 @@
1
- {"model_name": "kosine", "stage": 1, "slot_total": 20, "worker_id": "00913a92a2d4-2805", "updated_at": 1777215332, "slot_workers": ["e67068e0850e-497", "1f24e698ef4b-2772", "bf14fb62a2d5-908", "7cf5e805e860-2347", "616f62fcd3a6-5480", "9da8c8a9b5e0-6107", "09572e9649ff-14424", "326e386af23c-3264", "00913a92a2d4-2805"]}