Emil7018 commited on
Commit
62584b3
·
verified ·
1 Parent(s): 46fd3ad

Training in progress, epoch 1

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07e441f4986c665ba05c1436f3931b684f8a579a6f815d79e1e3ed27cb33e05a
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f456d0ed5f5b7415ebb134592fcc08a7b745e4e3ab9df5aab72bcf1fb2849cdc
3
  size 598898116
run-2/checkpoint-954/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47be2248f485aff0a35e94afcd646ca83b8673d6cb2ca7d25fd55d7d6e73c92a
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e43d911a8ecdd349e67fd8eadb50aab03d3ecdaf6b68ada27b35e9435ed9b8de
3
  size 598898116
run-2/checkpoint-954/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f586bbdb940858b5985f8b53c63cc841de46c96a4492542fc261557dd1121a2
3
  size 1197886411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82cf219a11787f54352b5f528ff3e4b5280d8cd4d55302a0da0e3d869088130d
3
  size 1197886411
run-2/checkpoint-954/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cb0104f24fd5b1c74b1de8bb979af0837a0c8451e509740f57bcbac9292de81
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ca29ceaa1ac87639bc24d09680a228e1ce164b6f4aa0b5f1189196aad30d93f
3
  size 1383
run-2/checkpoint-954/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 954,
3
- "best_metric": 0.943495053065245,
4
  "best_model_checkpoint": "ModernBERT-base-distilled/run-2/checkpoint-954",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
@@ -11,56 +11,53 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "grad_norm": 6.1552348136901855,
15
  "learning_rate": 1.3354297693920338e-05,
16
- "loss": 2.9367,
17
  "step": 318
18
  },
19
  {
20
  "epoch": 1.0,
21
- "eval_accuracy": 0.9012903225806451,
22
- "eval_f1": 0.8990202628613445,
23
- "eval_loss": 1.5702729225158691,
24
- "eval_model_preparation_time": 0.0029,
25
- "eval_runtime": 17.5772,
26
- "eval_samples_per_second": 176.365,
27
- "eval_steps_per_second": 3.698,
28
  "step": 318
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 3.8856751918792725,
33
  "learning_rate": 6.6876310272536696e-06,
34
- "loss": 1.1765,
35
  "step": 636
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.9412903225806452,
40
- "eval_f1": 0.9402146858730125,
41
- "eval_loss": 1.036224126815796,
42
- "eval_model_preparation_time": 0.0029,
43
- "eval_runtime": 17.3187,
44
- "eval_samples_per_second": 178.997,
45
- "eval_steps_per_second": 3.753,
46
  "step": 636
47
  },
48
  {
49
  "epoch": 3.0,
50
- "grad_norm": 2.4737651348114014,
51
  "learning_rate": 2.0964360587002098e-08,
52
- "loss": 0.8183,
53
  "step": 954
54
  },
55
  {
56
  "epoch": 3.0,
57
- "eval_accuracy": 0.944516129032258,
58
- "eval_f1": 0.943495053065245,
59
- "eval_loss": 0.8997413516044617,
60
- "eval_model_preparation_time": 0.0029,
61
- "eval_runtime": 17.4751,
62
- "eval_samples_per_second": 177.395,
63
- "eval_steps_per_second": 3.72,
64
  "step": 954
65
  }
66
  ],
@@ -94,8 +91,8 @@
94
  "train_batch_size": 48,
95
  "trial_name": null,
96
  "trial_params": {
97
- "alpha": 0.4894773949849175,
98
  "num_train_epochs": 3,
99
- "temperature": 20
100
  }
101
  }
 
1
  {
2
  "best_global_step": 954,
3
+ "best_metric": 0.9451672573240332,
4
  "best_model_checkpoint": "ModernBERT-base-distilled/run-2/checkpoint-954",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "grad_norm": 7.241408824920654,
15
  "learning_rate": 1.3354297693920338e-05,
16
+ "loss": 3.1039,
17
  "step": 318
18
  },
19
  {
20
  "epoch": 1.0,
21
+ "eval_accuracy": 0.9058064516129032,
22
+ "eval_f1": 0.9039413673060775,
23
+ "eval_loss": 1.5589689016342163,
24
+ "eval_runtime": 16.8256,
25
+ "eval_samples_per_second": 184.243,
26
+ "eval_steps_per_second": 3.863,
 
27
  "step": 318
28
  },
29
  {
30
  "epoch": 2.0,
31
+ "grad_norm": 4.324426651000977,
32
  "learning_rate": 6.6876310272536696e-06,
33
+ "loss": 1.1524,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 2.0,
38
+ "eval_accuracy": 0.94,
39
+ "eval_f1": 0.9386540826458253,
40
+ "eval_loss": 1.0111862421035767,
41
+ "eval_runtime": 16.5402,
42
+ "eval_samples_per_second": 187.422,
43
+ "eval_steps_per_second": 3.93,
 
44
  "step": 636
45
  },
46
  {
47
  "epoch": 3.0,
48
+ "grad_norm": 2.4562952518463135,
49
  "learning_rate": 2.0964360587002098e-08,
50
+ "loss": 0.7791,
51
  "step": 954
52
  },
53
  {
54
  "epoch": 3.0,
55
+ "eval_accuracy": 0.9461290322580646,
56
+ "eval_f1": 0.9451672573240332,
57
+ "eval_loss": 0.8669379949569702,
58
+ "eval_runtime": 16.7212,
59
+ "eval_samples_per_second": 185.393,
60
+ "eval_steps_per_second": 3.887,
 
61
  "step": 954
62
  }
63
  ],
 
91
  "train_batch_size": 48,
92
  "trial_name": null,
93
  "trial_params": {
94
+ "alpha": 0.5616085539831922,
95
  "num_train_epochs": 3,
96
+ "temperature": 14
97
  }
98
  }
run-2/checkpoint-954/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d7b3769acf391cea03ef15fd35cf3a1a8ad375b715605c557f3506a2acf1661
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac0ba789da4d8d3fec25ab5f556d37cdc9662d151e6cf3587f40da34600391df
3
  size 5905
run-3/checkpoint-318/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcb271e93031ae8b58b911b87fcc3f4c4180a86b75d33df2fb9492973d3fbff4
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f456d0ed5f5b7415ebb134592fcc08a7b745e4e3ab9df5aab72bcf1fb2849cdc
3
  size 598898116
run-3/checkpoint-318/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5a8626969a07ac753378a4efe9517ee8a28bf0f6570521bd12c99844f5db425
3
  size 1197886411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b932a20b2993303a88772f0eb02d764870588e7d0fb3d96228531f8522c0e74c
3
  size 1197886411
run-3/checkpoint-318/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60c346c7dfa879ca55dd907360767222bc10e5f61875eced577253aa33298d1a
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6ab09cf38d9cc45768ad6a2e8380f2fba62e9a29d4d1df08af0eb463f18f7c7
3
  size 1383
run-3/checkpoint-318/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 318,
3
- "best_metric": 0.9011780696610004,
4
  "best_model_checkpoint": "ModernBERT-base-distilled/run-3/checkpoint-318",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
@@ -11,20 +11,19 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "grad_norm": 5.834390640258789,
15
  "learning_rate": 1.3354297693920338e-05,
16
- "loss": 2.6495,
17
  "step": 318
18
  },
19
  {
20
  "epoch": 1.0,
21
- "eval_accuracy": 0.9035483870967742,
22
- "eval_f1": 0.9011780696610004,
23
- "eval_loss": 1.3007111549377441,
24
- "eval_model_preparation_time": 0.0029,
25
- "eval_runtime": 17.5576,
26
- "eval_samples_per_second": 176.561,
27
- "eval_steps_per_second": 3.702,
28
  "step": 318
29
  }
30
  ],
@@ -58,8 +57,8 @@
58
  "train_batch_size": 48,
59
  "trial_name": null,
60
  "trial_params": {
61
- "alpha": 0.6526114010549311,
62
  "num_train_epochs": 3,
63
- "temperature": 19
64
  }
65
  }
 
1
  {
2
  "best_global_step": 318,
3
+ "best_metric": 0.840301912678485,
4
  "best_model_checkpoint": "ModernBERT-base-distilled/run-3/checkpoint-318",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "grad_norm": 30.5030517578125,
15
  "learning_rate": 1.3354297693920338e-05,
16
+ "loss": 13.5958,
17
  "step": 318
18
  },
19
  {
20
  "epoch": 1.0,
21
+ "eval_accuracy": 0.8512903225806452,
22
+ "eval_f1": 0.840301912678485,
23
+ "eval_loss": 4.8386430740356445,
24
+ "eval_runtime": 16.7421,
25
+ "eval_samples_per_second": 185.162,
26
+ "eval_steps_per_second": 3.882,
 
27
  "step": 318
28
  }
29
  ],
 
57
  "train_batch_size": 48,
58
  "trial_name": null,
59
  "trial_params": {
60
+ "alpha": 0.009270166823335702,
61
  "num_train_epochs": 3,
62
+ "temperature": 5
63
  }
64
  }
run-3/checkpoint-318/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e1f35db374dcdc060b0bc938245b3236dd35b1439b812f5c776c2f2f93e0792
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfb3c5fcdf213574e08d6db11e83b561a9f50bdee78fb3ae2e8aeae12175b619
3
  size 5905
runs/Oct11_19-55-54_cd07aeb3aeb4/events.out.tfevents.1760215466.cd07aeb3aeb4.2948.3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57c2daa5b8175b293830d0200cf907aedd89aa33ed4e69310d8d97d7fc73923d
3
- size 14370
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd3cf32b264bff3341e0c311d9240fb12a345f71e43c7dec01fd6a2ba78ac6bd
3
+ size 15304
runs/Oct11_19-55-54_cd07aeb3aeb4/events.out.tfevents.1760216161.cd07aeb3aeb4.2948.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90b86ff7f94da9a7ca742b19ff9e85d89d0f876837882df45bf6bd4ce4757308
3
+ size 13790
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac0ba789da4d8d3fec25ab5f556d37cdc9662d151e6cf3587f40da34600391df
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfb3c5fcdf213574e08d6db11e83b561a9f50bdee78fb3ae2e8aeae12175b619
3
  size 5905