DanielDanielDanielDanielDanielDaniel commited on
Commit
eb82d37
·
verified ·
1 Parent(s): 32c032a

Training in progress, epoch 1

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f41dc7464f89e4f301f003180d87873095484861a9ac7fb83cf4b55052647bf
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e9a93c96af72d82d1948b211536371c8b90e993f97655333941c27153619b48
3
  size 598898116
run-2/checkpoint-240/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:add493ece107411bdcdf2294dbdc0f4f0d7a1165e8057c6e5ef4dea90267963d
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d19f751c835e4a83f407bd290c3cfa629eb5b7d69cc9a6c4aef931fd56b42d58
3
  size 598898116
run-2/checkpoint-240/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:890e9d45f3ad55cedb6a849cd4fdcba49e36803ff16da0356abb847f1076bf32
3
  size 1197886411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43bc5a5d7df840cb2bcd11c9964b930c621fdcbdaca88129d582aa99bc165459
3
  size 1197886411
run-2/checkpoint-240/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efbb971914d1f6b2141ee488b084ac87c0ce2e72c0fc9ba081492393d2add9f3
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d17eed1de60c5b0d63a7107d97b5f756179afc2b9ed6fa9240b6c0f51933a5ca
3
  size 1465
run-2/checkpoint-240/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e64dd41b8f1f02a59dec0ad0b327aa33b067da9f7bae29bbb31cf106ac5ff4ff
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30763bea4e95fddafc920e08c1ea1c5e03c960715d174ba50f005d578e068b60
3
  size 5905
run-2/checkpoint-360/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8952930d503dac89d9e0f19712ed66893f71a7af7c5a6cc0e7d68163e67e2242
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e71774b6250dccc404d7a43eb99909369c29006c636d7d5f020a1936f152985
3
  size 598898116
run-2/checkpoint-360/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b098c8db4180bad4a9ea8ec40059e465a21b283c496de2c55a9d81b605f46b78
3
  size 1197886411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6aeb750fd063d0897295ed9b3c704e8ca8affaccbfe391748bb41367334f2ec9
3
  size 1197886411
run-2/checkpoint-360/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b1a14b452e048ebab57fc7a59cdeb90d9da45ecb36b9e88f4865e0bfdef7240
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc5e6ebd5938282b4ca3e8958f53e1e7f1aab10a1a01fad3653bf04d73b15d61
3
  size 1465
run-2/checkpoint-360/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 360,
3
- "best_metric": 0.93,
4
  "best_model_checkpoint": "student_modernBERT_clinc_oos/run-2/checkpoint-360",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
@@ -11,36 +11,36 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.6835483870967742,
15
- "eval_loss": 3.5096893310546875,
16
- "eval_runtime": 8.6532,
17
- "eval_samples_per_second": 358.247,
18
- "eval_steps_per_second": 11.21,
19
  "step": 120
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_accuracy": 0.9,
24
- "eval_loss": 2.182756185531616,
25
- "eval_runtime": 9.7422,
26
- "eval_samples_per_second": 318.203,
27
- "eval_steps_per_second": 9.957,
28
  "step": 240
29
  },
30
  {
31
  "epoch": 3.0,
32
- "eval_accuracy": 0.93,
33
- "eval_loss": 1.5653125047683716,
34
- "eval_runtime": 9.6218,
35
- "eval_samples_per_second": 322.184,
36
- "eval_steps_per_second": 10.081,
37
  "step": 360
38
  }
39
  ],
40
  "logging_steps": 500,
41
- "max_steps": 960,
42
  "num_input_tokens_seen": 0,
43
- "num_train_epochs": 8,
44
  "save_steps": 500,
45
  "stateful_callbacks": {
46
  "TrainerControl": {
@@ -58,8 +58,8 @@
58
  "train_batch_size": 32,
59
  "trial_name": null,
60
  "trial_params": {
61
- "alpha": 0.65741975255739,
62
- "num_train_epochs": 8,
63
- "temperature": 18
64
  }
65
  }
 
1
  {
2
  "best_global_step": 360,
3
+ "best_metric": 0.922258064516129,
4
  "best_model_checkpoint": "student_modernBERT_clinc_oos/run-2/checkpoint-360",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.6767741935483871,
15
+ "eval_loss": 7.750784873962402,
16
+ "eval_runtime": 8.6464,
17
+ "eval_samples_per_second": 358.531,
18
+ "eval_steps_per_second": 11.219,
19
  "step": 120
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_accuracy": 0.8777419354838709,
24
+ "eval_loss": 3.922584056854248,
25
+ "eval_runtime": 8.802,
26
+ "eval_samples_per_second": 352.193,
27
+ "eval_steps_per_second": 11.02,
28
  "step": 240
29
  },
30
  {
31
  "epoch": 3.0,
32
+ "eval_accuracy": 0.922258064516129,
33
+ "eval_loss": 2.6575753688812256,
34
+ "eval_runtime": 9.5831,
35
+ "eval_samples_per_second": 323.485,
36
+ "eval_steps_per_second": 10.122,
37
  "step": 360
38
  }
39
  ],
40
  "logging_steps": 500,
41
+ "max_steps": 720,
42
  "num_input_tokens_seen": 0,
43
+ "num_train_epochs": 6,
44
  "save_steps": 500,
45
  "stateful_callbacks": {
46
  "TrainerControl": {
 
58
  "train_batch_size": 32,
59
  "trial_name": null,
60
  "trial_params": {
61
+ "alpha": 0.7203792274973846,
62
+ "num_train_epochs": 6,
63
+ "temperature": 7
64
  }
65
  }
run-2/checkpoint-360/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e64dd41b8f1f02a59dec0ad0b327aa33b067da9f7bae29bbb31cf106ac5ff4ff
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30763bea4e95fddafc920e08c1ea1c5e03c960715d174ba50f005d578e068b60
3
  size 5905
run-2/checkpoint-480/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf56298b886714900509880dc080fa5aa05ab7d1ecc79bcb35bf380777b70ac2
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1e82e0c12aba607099b4d2be138de8f321c2124b057f6f454b315485344be22
3
  size 598898116
run-2/checkpoint-480/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:271e68778d003fb75fba98848095273b837ea34599e55a9a2b4abc3d884c85e1
3
  size 1197886411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f795c607132939ca56cee0f6003a8a8eea7b7e4bb0953a26f65ba50ea431cec8
3
  size 1197886411
run-2/checkpoint-480/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cbcab650b1a67db624b2b9fea764b147dc79e5517b7a9119f143346d1168919
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddd0552961f51613cb77af24bc0e3fccce21e8083c3d1c6f93f74ab36b5046da
3
  size 1465
run-2/checkpoint-480/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 480,
3
- "best_metric": 0.9406451612903226,
4
  "best_model_checkpoint": "student_modernBERT_clinc_oos/run-2/checkpoint-480",
5
  "epoch": 4.0,
6
  "eval_steps": 500,
@@ -11,45 +11,45 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.6835483870967742,
15
- "eval_loss": 3.5096893310546875,
16
- "eval_runtime": 8.6532,
17
- "eval_samples_per_second": 358.247,
18
- "eval_steps_per_second": 11.21,
19
  "step": 120
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_accuracy": 0.9,
24
- "eval_loss": 2.182756185531616,
25
- "eval_runtime": 9.7422,
26
- "eval_samples_per_second": 318.203,
27
- "eval_steps_per_second": 9.957,
28
  "step": 240
29
  },
30
  {
31
  "epoch": 3.0,
32
- "eval_accuracy": 0.93,
33
- "eval_loss": 1.5653125047683716,
34
- "eval_runtime": 9.6218,
35
- "eval_samples_per_second": 322.184,
36
- "eval_steps_per_second": 10.081,
37
  "step": 360
38
  },
39
  {
40
  "epoch": 4.0,
41
- "eval_accuracy": 0.9406451612903226,
42
- "eval_loss": 1.2410972118377686,
43
- "eval_runtime": 8.8214,
44
- "eval_samples_per_second": 351.416,
45
- "eval_steps_per_second": 10.996,
46
  "step": 480
47
  }
48
  ],
49
  "logging_steps": 500,
50
- "max_steps": 960,
51
  "num_input_tokens_seen": 0,
52
- "num_train_epochs": 8,
53
  "save_steps": 500,
54
  "stateful_callbacks": {
55
  "TrainerControl": {
@@ -67,8 +67,8 @@
67
  "train_batch_size": 32,
68
  "trial_name": null,
69
  "trial_params": {
70
- "alpha": 0.65741975255739,
71
- "num_train_epochs": 8,
72
- "temperature": 18
73
  }
74
  }
 
1
  {
2
  "best_global_step": 480,
3
+ "best_metric": 0.9312903225806451,
4
  "best_model_checkpoint": "student_modernBERT_clinc_oos/run-2/checkpoint-480",
5
  "epoch": 4.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.6767741935483871,
15
+ "eval_loss": 7.750784873962402,
16
+ "eval_runtime": 8.6464,
17
+ "eval_samples_per_second": 358.531,
18
+ "eval_steps_per_second": 11.219,
19
  "step": 120
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_accuracy": 0.8777419354838709,
24
+ "eval_loss": 3.922584056854248,
25
+ "eval_runtime": 8.802,
26
+ "eval_samples_per_second": 352.193,
27
+ "eval_steps_per_second": 11.02,
28
  "step": 240
29
  },
30
  {
31
  "epoch": 3.0,
32
+ "eval_accuracy": 0.922258064516129,
33
+ "eval_loss": 2.6575753688812256,
34
+ "eval_runtime": 9.5831,
35
+ "eval_samples_per_second": 323.485,
36
+ "eval_steps_per_second": 10.122,
37
  "step": 360
38
  },
39
  {
40
  "epoch": 4.0,
41
+ "eval_accuracy": 0.9312903225806451,
42
+ "eval_loss": 2.112471342086792,
43
+ "eval_runtime": 8.867,
44
+ "eval_samples_per_second": 349.611,
45
+ "eval_steps_per_second": 10.939,
46
  "step": 480
47
  }
48
  ],
49
  "logging_steps": 500,
50
+ "max_steps": 720,
51
  "num_input_tokens_seen": 0,
52
+ "num_train_epochs": 6,
53
  "save_steps": 500,
54
  "stateful_callbacks": {
55
  "TrainerControl": {
 
67
  "train_batch_size": 32,
68
  "trial_name": null,
69
  "trial_params": {
70
+ "alpha": 0.7203792274973846,
71
+ "num_train_epochs": 6,
72
+ "temperature": 7
73
  }
74
  }
run-2/checkpoint-480/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e64dd41b8f1f02a59dec0ad0b327aa33b067da9f7bae29bbb31cf106ac5ff4ff
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30763bea4e95fddafc920e08c1ea1c5e03c960715d174ba50f005d578e068b60
3
  size 5905
run-2/checkpoint-600/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10f1f7fb2f8c6a6c1d672d93b06b4e09ffaf4ea4c0341d51088624523fb8b551
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1dfaeca3a10b2e05d4644ba854079a0f41d2c936606f2375dfbb165a8632c62
3
  size 598898116
run-2/checkpoint-600/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3658a1f590a94c5f2031545d819d6f7ac07c65dd32c9cd8b5a3aaf56ccab8157
3
  size 1197886411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a5e880c068ce9544975471f48cc4c92dc9f5d68252ba51d7a6ac95e64ec8bdc
3
  size 1197886411
run-2/checkpoint-600/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5474f27733acd307abb3442c0e732ad487e4808c510184cc818db64a9ae46cd
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bf3fb53f5149438e86367cf7dc1034ffdf5899cb1e2f8a4da236e4f899094c7
3
  size 1465
run-2/checkpoint-600/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 600,
3
- "best_metric": 0.9451612903225807,
4
  "best_model_checkpoint": "student_modernBERT_clinc_oos/run-2/checkpoint-600",
5
  "epoch": 5.0,
6
  "eval_steps": 500,
@@ -11,61 +11,61 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.6835483870967742,
15
- "eval_loss": 3.5096893310546875,
16
- "eval_runtime": 8.6532,
17
- "eval_samples_per_second": 358.247,
18
- "eval_steps_per_second": 11.21,
19
  "step": 120
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_accuracy": 0.9,
24
- "eval_loss": 2.182756185531616,
25
- "eval_runtime": 9.7422,
26
- "eval_samples_per_second": 318.203,
27
- "eval_steps_per_second": 9.957,
28
  "step": 240
29
  },
30
  {
31
  "epoch": 3.0,
32
- "eval_accuracy": 0.93,
33
- "eval_loss": 1.5653125047683716,
34
- "eval_runtime": 9.6218,
35
- "eval_samples_per_second": 322.184,
36
- "eval_steps_per_second": 10.081,
37
  "step": 360
38
  },
39
  {
40
  "epoch": 4.0,
41
- "eval_accuracy": 0.9406451612903226,
42
- "eval_loss": 1.2410972118377686,
43
- "eval_runtime": 8.8214,
44
- "eval_samples_per_second": 351.416,
45
- "eval_steps_per_second": 10.996,
46
  "step": 480
47
  },
48
  {
49
  "epoch": 4.167714884696017,
50
- "grad_norm": 13.684679985046387,
51
- "learning_rate": 9.604166666666669e-06,
52
- "loss": 10.4217,
53
  "step": 500
54
  },
55
  {
56
  "epoch": 5.0,
57
- "eval_accuracy": 0.9451612903225807,
58
- "eval_loss": 1.077558159828186,
59
- "eval_runtime": 9.0857,
60
- "eval_samples_per_second": 341.194,
61
- "eval_steps_per_second": 10.676,
62
  "step": 600
63
  }
64
  ],
65
  "logging_steps": 500,
66
- "max_steps": 960,
67
  "num_input_tokens_seen": 0,
68
- "num_train_epochs": 8,
69
  "save_steps": 500,
70
  "stateful_callbacks": {
71
  "TrainerControl": {
@@ -83,8 +83,8 @@
83
  "train_batch_size": 32,
84
  "trial_name": null,
85
  "trial_params": {
86
- "alpha": 0.65741975255739,
87
- "num_train_epochs": 8,
88
- "temperature": 18
89
  }
90
  }
 
1
  {
2
  "best_global_step": 600,
3
+ "best_metric": 0.9393548387096774,
4
  "best_model_checkpoint": "student_modernBERT_clinc_oos/run-2/checkpoint-600",
5
  "epoch": 5.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.6767741935483871,
15
+ "eval_loss": 7.750784873962402,
16
+ "eval_runtime": 8.6464,
17
+ "eval_samples_per_second": 358.531,
18
+ "eval_steps_per_second": 11.219,
19
  "step": 120
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_accuracy": 0.8777419354838709,
24
+ "eval_loss": 3.922584056854248,
25
+ "eval_runtime": 8.802,
26
+ "eval_samples_per_second": 352.193,
27
+ "eval_steps_per_second": 11.02,
28
  "step": 240
29
  },
30
  {
31
  "epoch": 3.0,
32
+ "eval_accuracy": 0.922258064516129,
33
+ "eval_loss": 2.6575753688812256,
34
+ "eval_runtime": 9.5831,
35
+ "eval_samples_per_second": 323.485,
36
+ "eval_steps_per_second": 10.122,
37
  "step": 360
38
  },
39
  {
40
  "epoch": 4.0,
41
+ "eval_accuracy": 0.9312903225806451,
42
+ "eval_loss": 2.112471342086792,
43
+ "eval_runtime": 8.867,
44
+ "eval_samples_per_second": 349.611,
45
+ "eval_steps_per_second": 10.939,
46
  "step": 480
47
  },
48
  {
49
  "epoch": 4.167714884696017,
50
+ "grad_norm": 28.72196388244629,
51
+ "learning_rate": 6.13888888888889e-06,
52
+ "loss": 22.9799,
53
  "step": 500
54
  },
55
  {
56
  "epoch": 5.0,
57
+ "eval_accuracy": 0.9393548387096774,
58
+ "eval_loss": 1.863680362701416,
59
+ "eval_runtime": 8.8973,
60
+ "eval_samples_per_second": 348.42,
61
+ "eval_steps_per_second": 10.902,
62
  "step": 600
63
  }
64
  ],
65
  "logging_steps": 500,
66
+ "max_steps": 720,
67
  "num_input_tokens_seen": 0,
68
+ "num_train_epochs": 6,
69
  "save_steps": 500,
70
  "stateful_callbacks": {
71
  "TrainerControl": {
 
83
  "train_batch_size": 32,
84
  "trial_name": null,
85
  "trial_params": {
86
+ "alpha": 0.7203792274973846,
87
+ "num_train_epochs": 6,
88
+ "temperature": 7
89
  }
90
  }
run-2/checkpoint-600/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e64dd41b8f1f02a59dec0ad0b327aa33b067da9f7bae29bbb31cf106ac5ff4ff
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30763bea4e95fddafc920e08c1ea1c5e03c960715d174ba50f005d578e068b60
3
  size 5905
run-2/checkpoint-720/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cc75c7f8e37b8dd524825dfcde256ad68c5d057195c3a3cc8cfb0147be7caba
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6590962f40d2c0c783a5746d100afd9db7ec6b03610c2894af9467c18e00f0d3
3
  size 598898116
run-2/checkpoint-720/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3eced8bff649fe140520dac12170e52c052403db9886807c475b5848318d3457
3
- size 767230411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aba6bd0191facb8da3d8c41285b6d8db14979919284c22f722d03d1e981bab38
3
+ size 1197886411
run-2/checkpoint-720/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06eef7f954d1b09437a102c64a6d35db565f4aaad01b684ed37a17f3c3f88e86
3
+ size 14645
run-2/checkpoint-720/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3863a446d67309a219068d04b8e7029d8b680b52900fd8ef93439bde5094d2a4
3
+ size 1465
run-2/checkpoint-720/trainer_state.json ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 720,
3
+ "best_metric": 0.9425806451612904,
4
+ "best_model_checkpoint": "student_modernBERT_clinc_oos/run-2/checkpoint-720",
5
+ "epoch": 6.0,
6
+ "eval_steps": 500,
7
+ "global_step": 720,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_accuracy": 0.6767741935483871,
15
+ "eval_loss": 7.750784873962402,
16
+ "eval_runtime": 8.6464,
17
+ "eval_samples_per_second": 358.531,
18
+ "eval_steps_per_second": 11.219,
19
+ "step": 120
20
+ },
21
+ {
22
+ "epoch": 2.0,
23
+ "eval_accuracy": 0.8777419354838709,
24
+ "eval_loss": 3.922584056854248,
25
+ "eval_runtime": 8.802,
26
+ "eval_samples_per_second": 352.193,
27
+ "eval_steps_per_second": 11.02,
28
+ "step": 240
29
+ },
30
+ {
31
+ "epoch": 3.0,
32
+ "eval_accuracy": 0.922258064516129,
33
+ "eval_loss": 2.6575753688812256,
34
+ "eval_runtime": 9.5831,
35
+ "eval_samples_per_second": 323.485,
36
+ "eval_steps_per_second": 10.122,
37
+ "step": 360
38
+ },
39
+ {
40
+ "epoch": 4.0,
41
+ "eval_accuracy": 0.9312903225806451,
42
+ "eval_loss": 2.112471342086792,
43
+ "eval_runtime": 8.867,
44
+ "eval_samples_per_second": 349.611,
45
+ "eval_steps_per_second": 10.939,
46
+ "step": 480
47
+ },
48
+ {
49
+ "epoch": 4.167714884696017,
50
+ "grad_norm": 28.72196388244629,
51
+ "learning_rate": 6.13888888888889e-06,
52
+ "loss": 22.9799,
53
+ "step": 500
54
+ },
55
+ {
56
+ "epoch": 5.0,
57
+ "eval_accuracy": 0.9393548387096774,
58
+ "eval_loss": 1.863680362701416,
59
+ "eval_runtime": 8.8973,
60
+ "eval_samples_per_second": 348.42,
61
+ "eval_steps_per_second": 10.902,
62
+ "step": 600
63
+ },
64
+ {
65
+ "epoch": 6.0,
66
+ "eval_accuracy": 0.9425806451612904,
67
+ "eval_loss": 1.7904150485992432,
68
+ "eval_runtime": 9.4998,
69
+ "eval_samples_per_second": 326.321,
70
+ "eval_steps_per_second": 10.211,
71
+ "step": 720
72
+ }
73
+ ],
74
+ "logging_steps": 500,
75
+ "max_steps": 720,
76
+ "num_input_tokens_seen": 0,
77
+ "num_train_epochs": 6,
78
+ "save_steps": 500,
79
+ "stateful_callbacks": {
80
+ "TrainerControl": {
81
+ "args": {
82
+ "should_epoch_stop": false,
83
+ "should_evaluate": false,
84
+ "should_log": false,
85
+ "should_save": true,
86
+ "should_training_stop": true
87
+ },
88
+ "attributes": {}
89
+ }
90
+ },
91
+ "total_flos": 891058846689456.0,
92
+ "train_batch_size": 32,
93
+ "trial_name": null,
94
+ "trial_params": {
95
+ "alpha": 0.7203792274973846,
96
+ "num_train_epochs": 6,
97
+ "temperature": 7
98
+ }
99
+ }
run-2/checkpoint-720/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e64dd41b8f1f02a59dec0ad0b327aa33b067da9f7bae29bbb31cf106ac5ff4ff
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30763bea4e95fddafc920e08c1ea1c5e03c960715d174ba50f005d578e068b60
3
  size 5905
runs/Oct22_10-42-10_5515f8f51c79/events.out.tfevents.1761132371.5515f8f51c79.36495.3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:051644a8854a7926e4fd126c0305876abb89e7a1002331587a805575b6c6b959
3
- size 13861
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e39c74d3b7afbaf88687b5fce085a0d250458d5b92a90bd2bbd77e86e72eb2f4
3
+ size 15718
runs/Oct22_10-42-10_5515f8f51c79/events.out.tfevents.1761133175.5515f8f51c79.36495.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4f6be3cc29f8c83b7ddef83f92deaf61b64c8828e775002346da9c16c147ae4
3
+ size 15725
runs/Oct22_15-09-25_5515f8f51c79/events.out.tfevents.1761145777.5515f8f51c79.102745.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:615a882739da2feb430a50d5b9acff8d7bab3eaf80a93bed537e2f1af4450667
3
+ size 13861
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30763bea4e95fddafc920e08c1ea1c5e03c960715d174ba50f005d578e068b60
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a9027474d38a9f6a2b10e091b1562daaa0163abd36a73ab4256da426ba345b2
3
  size 5905