Emil7018 commited on
Commit
46fd3ad
·
verified ·
1 Parent(s): 5d18158

Training in progress, epoch 2

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a21ae3c9e3db79090f4ff9bbb5ab67d6c8d6949686493fa499c05e35af8cda2
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07e441f4986c665ba05c1436f3931b684f8a579a6f815d79e1e3ed27cb33e05a
3
  size 598898116
run-2/checkpoint-636/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bd7bdef26f6b65507508637483b1b5872daf94543631344f6052c1b96e2b7c3
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07e441f4986c665ba05c1436f3931b684f8a579a6f815d79e1e3ed27cb33e05a
3
  size 598898116
run-2/checkpoint-636/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a1f2b004904d99befa65b7cb0796986401ec5e2ccecea20e7e2540742c96b5b
3
  size 1197886411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb4e7ac45a45c238809fdc443a4013aa6512c6bc836699fc456ab3178a533410
3
  size 1197886411
run-2/checkpoint-636/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d1227ec0ded90cf65dfb5e83b9af42af4a01a04951bd873a30191e8c9579bf4
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b342b565cc113c3c0d6c74f4134edc5cbad007e754b2134bf57179bf442ad650
3
  size 1383
run-2/checkpoint-636/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 636,
3
- "best_metric": 0.9402146858730125,
4
  "best_model_checkpoint": "ModernBERT-base-distilled/run-2/checkpoint-636",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
@@ -11,38 +11,36 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "grad_norm": 6.1552348136901855,
15
  "learning_rate": 1.3354297693920338e-05,
16
- "loss": 2.9367,
17
  "step": 318
18
  },
19
  {
20
  "epoch": 1.0,
21
- "eval_accuracy": 0.9012903225806451,
22
- "eval_f1": 0.8990202628613445,
23
- "eval_loss": 1.5702729225158691,
24
- "eval_model_preparation_time": 0.0029,
25
- "eval_runtime": 17.5772,
26
- "eval_samples_per_second": 176.365,
27
- "eval_steps_per_second": 3.698,
28
  "step": 318
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 3.8856751918792725,
33
  "learning_rate": 6.6876310272536696e-06,
34
- "loss": 1.1765,
35
  "step": 636
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.9412903225806452,
40
- "eval_f1": 0.9402146858730125,
41
- "eval_loss": 1.036224126815796,
42
- "eval_model_preparation_time": 0.0029,
43
- "eval_runtime": 17.3187,
44
- "eval_samples_per_second": 178.997,
45
- "eval_steps_per_second": 3.753,
46
  "step": 636
47
  }
48
  ],
@@ -76,8 +74,8 @@
76
  "train_batch_size": 48,
77
  "trial_name": null,
78
  "trial_params": {
79
- "alpha": 0.4894773949849175,
80
  "num_train_epochs": 3,
81
- "temperature": 20
82
  }
83
  }
 
1
  {
2
  "best_global_step": 636,
3
+ "best_metric": 0.9386540826458253,
4
  "best_model_checkpoint": "ModernBERT-base-distilled/run-2/checkpoint-636",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "grad_norm": 7.241408824920654,
15
  "learning_rate": 1.3354297693920338e-05,
16
+ "loss": 3.1039,
17
  "step": 318
18
  },
19
  {
20
  "epoch": 1.0,
21
+ "eval_accuracy": 0.9058064516129032,
22
+ "eval_f1": 0.9039413673060775,
23
+ "eval_loss": 1.5589689016342163,
24
+ "eval_runtime": 16.8256,
25
+ "eval_samples_per_second": 184.243,
26
+ "eval_steps_per_second": 3.863,
 
27
  "step": 318
28
  },
29
  {
30
  "epoch": 2.0,
31
+ "grad_norm": 4.324426651000977,
32
  "learning_rate": 6.6876310272536696e-06,
33
+ "loss": 1.1524,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 2.0,
38
+ "eval_accuracy": 0.94,
39
+ "eval_f1": 0.9386540826458253,
40
+ "eval_loss": 1.0111862421035767,
41
+ "eval_runtime": 16.5402,
42
+ "eval_samples_per_second": 187.422,
43
+ "eval_steps_per_second": 3.93,
 
44
  "step": 636
45
  }
46
  ],
 
74
  "train_batch_size": 48,
75
  "trial_name": null,
76
  "trial_params": {
77
+ "alpha": 0.5616085539831922,
78
  "num_train_epochs": 3,
79
+ "temperature": 14
80
  }
81
  }
run-2/checkpoint-636/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d7b3769acf391cea03ef15fd35cf3a1a8ad375b715605c557f3506a2acf1661
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac0ba789da4d8d3fec25ab5f556d37cdc9662d151e6cf3587f40da34600391df
3
  size 5905
runs/Oct11_19-55-54_cd07aeb3aeb4/events.out.tfevents.1760215466.cd07aeb3aeb4.2948.3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85145d834b697b1bed0f4173a589361272285753515c031a2627b376ccd44b2c
3
- size 13790
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57c2daa5b8175b293830d0200cf907aedd89aa33ed4e69310d8d97d7fc73923d
3
+ size 14370