Emil7018 commited on
Commit
c52d45c
·
verified ·
1 Parent(s): a4c60ba

Training in progress, epoch 2

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d05da4069fc52d58f023e29df4dcdd61eef9ec8db8396ec64e56917963bbb9d9
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf658bcb3d6e926ee8e4606c9b8822870aeb69f28f4d04ca4eeb4ef7b23b422c
3
  size 598898116
run-0/checkpoint-636/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d34bdf3ceb775f7abffb8b735fee41b5a8dd68734961c8b0d8b0ea59b85b35c
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf658bcb3d6e926ee8e4606c9b8822870aeb69f28f4d04ca4eeb4ef7b23b422c
3
  size 598898116
run-0/checkpoint-636/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e806e352a23a7f796f9affbdeff3dde5a2ae4aa62f4d76ab0613a13166ed188d
3
  size 1197886411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa9b8bf2bab7a8a5591b66774d44bd2a63a1855204d0243b34371f380038a837
3
  size 1197886411
run-0/checkpoint-636/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8edf381b4ea3ddb40e833d7627fcf7989bb19d7093095cfc21fcbe490bc46ba5
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f46bd5e01fd050f0b60b6f6b624c2b4bbdecaf3740e9bbb0744d423f91c08a4b
3
  size 1383
run-0/checkpoint-636/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:897f0481850fa5341e40153cf6dcf6d03b540ee8cbb8de088c3f76164e5df9c4
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fff711eebd4297f4d30bcd2f761d370446defe74842dbd45de3421b9f4e097b3
3
  size 1465
run-0/checkpoint-636/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 636,
3
- "best_metric": 0.919790351277775,
4
  "best_model_checkpoint": "ModernBERT-base-distilled/run-0/checkpoint-636",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
@@ -11,43 +11,43 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "grad_norm": 6.882817268371582,
15
- "learning_rate": 1.0031446540880504e-05,
16
- "loss": 3.7921,
17
  "step": 318
18
  },
19
  {
20
  "epoch": 1.0,
21
- "eval_accuracy": 0.8806451612903226,
22
- "eval_f1": 0.8782099185536261,
23
- "eval_loss": 2.21335768699646,
24
- "eval_runtime": 19.6251,
25
- "eval_samples_per_second": 157.961,
26
- "eval_steps_per_second": 3.312,
27
  "step": 318
28
  },
29
  {
30
  "epoch": 2.0,
31
- "grad_norm": 5.356029510498047,
32
- "learning_rate": 3.1446540880503146e-08,
33
- "loss": 1.8385,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 2.0,
38
- "eval_accuracy": 0.9209677419354839,
39
- "eval_f1": 0.919790351277775,
40
- "eval_loss": 1.6662155389785767,
41
- "eval_runtime": 16.6015,
42
- "eval_samples_per_second": 186.73,
43
- "eval_steps_per_second": 3.915,
44
  "step": 636
45
  }
46
  ],
47
  "logging_steps": 500,
48
- "max_steps": 636,
49
  "num_input_tokens_seen": 0,
50
- "num_train_epochs": 2,
51
  "save_steps": 500,
52
  "stateful_callbacks": {
53
  "EarlyStoppingCallback": {
@@ -65,17 +65,17 @@
65
  "should_evaluate": false,
66
  "should_log": false,
67
  "should_save": true,
68
- "should_training_stop": true
69
  },
70
  "attributes": {}
71
  }
72
  },
73
- "total_flos": 450992510607528.0,
74
  "train_batch_size": 48,
75
  "trial_name": null,
76
  "trial_params": {
77
- "alpha": 0.2238473023876273,
78
- "num_train_epochs": 2,
79
- "temperature": 16
80
  }
81
  }
 
1
  {
2
  "best_global_step": 636,
3
+ "best_metric": 0.9414720571915698,
4
  "best_model_checkpoint": "ModernBERT-base-distilled/run-0/checkpoint-636",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "grad_norm": 7.095359802246094,
15
+ "learning_rate": 1.5015723270440253e-05,
16
+ "loss": 4.2443,
17
  "step": 318
18
  },
19
  {
20
  "epoch": 1.0,
21
+ "eval_accuracy": 0.9070967741935484,
22
+ "eval_f1": 0.9051373620810793,
23
+ "eval_loss": 2.06101655960083,
24
+ "eval_runtime": 16.6455,
25
+ "eval_samples_per_second": 186.236,
26
+ "eval_steps_per_second": 3.905,
27
  "step": 318
28
  },
29
  {
30
  "epoch": 2.0,
31
+ "grad_norm": 6.176209449768066,
32
+ "learning_rate": 1.0015723270440252e-05,
33
+ "loss": 1.4635,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 2.0,
38
+ "eval_accuracy": 0.9425806451612904,
39
+ "eval_f1": 0.9414720571915698,
40
+ "eval_loss": 1.1604701280593872,
41
+ "eval_runtime": 16.5771,
42
+ "eval_samples_per_second": 187.005,
43
+ "eval_steps_per_second": 3.921,
44
  "step": 636
45
  }
46
  ],
47
  "logging_steps": 500,
48
+ "max_steps": 1272,
49
  "num_input_tokens_seen": 0,
50
+ "num_train_epochs": 4,
51
  "save_steps": 500,
52
  "stateful_callbacks": {
53
  "EarlyStoppingCallback": {
 
65
  "should_evaluate": false,
66
  "should_log": false,
67
  "should_save": true,
68
+ "should_training_stop": false
69
  },
70
  "attributes": {}
71
  }
72
  },
73
+ "total_flos": 556619304143304.0,
74
  "train_batch_size": 48,
75
  "trial_name": null,
76
  "trial_params": {
77
+ "alpha": 0.362336107066899,
78
+ "num_train_epochs": 4,
79
+ "temperature": 10
80
  }
81
  }
run-0/checkpoint-636/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c288a49e5d462511afdcb18951b1fb2d83ef9f1ece8ef0108b7a3182e94fe32
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e57990bfcffc7508b1aa41cf9c14f9a33fd6a76ae0e5b564184c21c4e65e1398
3
  size 5905
runs/Oct11_19-55-54_cd07aeb3aeb4/events.out.tfevents.1760213687.cd07aeb3aeb4.2948.1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8347db70977538ed9d9e433e8a4c4278244d3788ddfb49687a6c6c6eb814aae
3
- size 27492
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4325dfbf291448b9a1983f1cfad1c5aec6e8ea31c0fdf2bbb3ee2ec29d3515ed
3
+ size 28072