OskarMosberg commited on
Commit
c323985
·
verified ·
1 Parent(s): 4fb03b3

Training in progress, step 1908

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:518331da269c662ee8830603381cbda2d35ec5ecdd797b4d2687723047940dab
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae6703ccdf17348312313e50fadfbcd524f63c2c308cf6108eea38721f3dcc4b
3
  size 268290900
run-0/checkpoint-1908/config.json CHANGED
@@ -325,6 +325,6 @@
325
  "seq_classif_dropout": 0.2,
326
  "sinusoidal_pos_embds": false,
327
  "tie_weights_": true,
328
- "transformers_version": "4.57.0",
329
  "vocab_size": 30522
330
  }
 
325
  "seq_classif_dropout": 0.2,
326
  "sinusoidal_pos_embds": false,
327
  "tie_weights_": true,
328
+ "transformers_version": "4.57.1",
329
  "vocab_size": 30522
330
  }
run-0/checkpoint-1908/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa1ae5e2c4fd0dbdc858eb943503d5d02d6bdcd7cf73efb5f11d893bfd0fe90b
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae6703ccdf17348312313e50fadfbcd524f63c2c308cf6108eea38721f3dcc4b
3
  size 268290900
run-0/checkpoint-1908/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dff0a9409988dc578fad267022fc1473bd315a7d1f3baef1e34ae7138fd8c74e
3
  size 536645835
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f749146330a15d2c3dab070f988f3cde7e87b636f5278eaa8ed6a374e0a9f71
3
  size 536645835
run-0/checkpoint-1908/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41515894a907f055957ca1bcf7cd465bce98c3eb7dbcbbe063af0c31a41d495d
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57c5c828e912fe4f6472cbb9b4d4e6ff70cfdd888b55d7a6b18a985c7d794d58
3
  size 14645
run-0/checkpoint-1908/trainer_state.json CHANGED
@@ -9,78 +9,99 @@
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
 
 
 
 
 
 
 
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.69,
15
- "eval_loss": 0.20655198395252228,
16
- "eval_runtime": 1.0895,
17
- "eval_samples_per_second": 2845.214,
18
- "eval_steps_per_second": 59.658,
19
  "step": 318
20
  },
21
  {
22
- "epoch": 1.5723270440251573,
23
- "grad_norm": 0.6757088303565979,
24
- "learning_rate": 1.4769392033542977e-05,
25
- "loss": 0.394,
26
- "step": 500
27
  },
28
  {
29
  "epoch": 2.0,
30
- "eval_accuracy": 0.8238709677419355,
31
- "eval_loss": 0.054807424545288086,
32
- "eval_runtime": 1.1062,
33
- "eval_samples_per_second": 2802.484,
34
- "eval_steps_per_second": 58.762,
35
  "step": 636
36
  },
 
 
 
 
 
 
 
37
  {
38
  "epoch": 3.0,
39
- "eval_accuracy": 0.8574193548387097,
40
- "eval_loss": 0.020307132974267006,
41
- "eval_runtime": 1.0941,
42
- "eval_samples_per_second": 2833.344,
43
- "eval_steps_per_second": 59.409,
44
  "step": 954
45
  },
46
  {
47
- "epoch": 3.1446540880503147,
48
- "grad_norm": 0.37207934260368347,
49
- "learning_rate": 9.528301886792455e-06,
50
- "loss": 0.0889,
51
- "step": 1000
52
  },
53
  {
54
  "epoch": 4.0,
55
- "eval_accuracy": 0.8745161290322581,
56
- "eval_loss": 0.011259685270488262,
57
- "eval_runtime": 1.0904,
58
- "eval_samples_per_second": 2842.954,
59
- "eval_steps_per_second": 59.61,
60
  "step": 1272
61
  },
62
  {
63
- "epoch": 4.716981132075472,
64
- "grad_norm": 0.2620565891265869,
65
- "learning_rate": 4.287211740041929e-06,
66
- "loss": 0.0498,
67
- "step": 1500
68
  },
69
  {
70
  "epoch": 5.0,
71
- "eval_accuracy": 0.8835483870967742,
72
- "eval_loss": 0.008391091600060463,
73
- "eval_runtime": 1.1151,
74
- "eval_samples_per_second": 2780.007,
75
- "eval_steps_per_second": 58.29,
76
  "step": 1590
 
 
 
 
 
 
 
77
  }
78
  ],
79
- "logging_steps": 500,
80
  "max_steps": 1908,
81
  "num_input_tokens_seen": 0,
82
  "num_train_epochs": 6,
83
- "save_steps": 500,
84
  "stateful_callbacks": {
85
  "TrainerControl": {
86
  "args": {
@@ -93,12 +114,12 @@
93
  "attributes": {}
94
  }
95
  },
96
- "total_flos": 391368939443328.0,
97
  "train_batch_size": 48,
98
  "trial_name": null,
99
  "trial_params": {
100
- "alpha": 0.8970714832139004,
101
  "num_train_epochs": 6,
102
- "temperature": 3
103
  }
104
  }
 
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
+ {
13
+ "epoch": 0.9968553459119497,
14
+ "grad_norm": 8.542065620422363,
15
+ "learning_rate": 1.668763102725367e-05,
16
+ "loss": 19.3867,
17
+ "step": 317
18
+ },
19
  {
20
  "epoch": 1.0,
21
+ "eval_accuracy": 0.0070967741935483875,
22
+ "eval_loss": 15.982450485229492,
23
+ "eval_runtime": 7.4965,
24
+ "eval_samples_per_second": 413.525,
25
+ "eval_steps_per_second": 8.671,
26
  "step": 318
27
  },
28
  {
29
+ "epoch": 1.9937106918238994,
30
+ "grad_norm": 15.621566772460938,
31
+ "learning_rate": 1.3364779874213839e-05,
32
+ "loss": 14.5452,
33
+ "step": 634
34
  },
35
  {
36
  "epoch": 2.0,
37
+ "eval_accuracy": 0.004838709677419355,
38
+ "eval_loss": 13.407492637634277,
39
+ "eval_runtime": 7.6188,
40
+ "eval_samples_per_second": 406.887,
41
+ "eval_steps_per_second": 8.532,
42
  "step": 636
43
  },
44
+ {
45
+ "epoch": 2.990566037735849,
46
+ "grad_norm": 26.90778350830078,
47
+ "learning_rate": 1.0041928721174005e-05,
48
+ "loss": 12.1177,
49
+ "step": 951
50
+ },
51
  {
52
  "epoch": 3.0,
53
+ "eval_accuracy": 0.003870967741935484,
54
+ "eval_loss": 11.682502746582031,
55
+ "eval_runtime": 7.556,
56
+ "eval_samples_per_second": 410.27,
57
+ "eval_steps_per_second": 8.602,
58
  "step": 954
59
  },
60
  {
61
+ "epoch": 3.9874213836477987,
62
+ "grad_norm": 22.79652976989746,
63
+ "learning_rate": 6.719077568134172e-06,
64
+ "loss": 10.4838,
65
+ "step": 1268
66
  },
67
  {
68
  "epoch": 4.0,
69
+ "eval_accuracy": 0.004193548387096774,
70
+ "eval_loss": 10.62924861907959,
71
+ "eval_runtime": 7.5805,
72
+ "eval_samples_per_second": 408.945,
73
+ "eval_steps_per_second": 8.575,
74
  "step": 1272
75
  },
76
  {
77
+ "epoch": 4.984276729559748,
78
+ "grad_norm": 33.079402923583984,
79
+ "learning_rate": 3.3962264150943395e-06,
80
+ "loss": 9.5574,
81
+ "step": 1585
82
  },
83
  {
84
  "epoch": 5.0,
85
+ "eval_accuracy": 0.004516129032258065,
86
+ "eval_loss": 10.126119613647461,
87
+ "eval_runtime": 7.5983,
88
+ "eval_samples_per_second": 407.986,
89
+ "eval_steps_per_second": 8.555,
90
  "step": 1590
91
+ },
92
+ {
93
+ "epoch": 5.981132075471698,
94
+ "grad_norm": 33.83201217651367,
95
+ "learning_rate": 7.337526205450734e-08,
96
+ "loss": 9.0404,
97
+ "step": 1902
98
  }
99
  ],
100
+ "logging_steps": 317,
101
  "max_steps": 1908,
102
  "num_input_tokens_seen": 0,
103
  "num_train_epochs": 6,
104
+ "save_steps": 1000000000.0,
105
  "stateful_callbacks": {
106
  "TrainerControl": {
107
  "args": {
 
114
  "attributes": {}
115
  }
116
  },
117
+ "total_flos": 495439677408900.0,
118
  "train_batch_size": 48,
119
  "trial_name": null,
120
  "trial_params": {
121
+ "alpha": 0.807815004222285,
122
  "num_train_epochs": 6,
123
+ "temperature": 4
124
  }
125
  }
run-0/checkpoint-1908/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3c42abe9309d4e02d824bbbc39bab4b3e2453bce489d5394f8d3bb9943956d8
3
  size 5841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:204994af76901ff236c7a1bf65cfa8e9d7882677ca49f5ac718276495bae9a6b
3
  size 5841
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7593fed01b1106bf27d4c4add3956e6289406e36eb30a57ac51ad6262116582c
3
  size 5841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:204994af76901ff236c7a1bf65cfa8e9d7882677ca49f5ac718276495bae9a6b
3
  size 5841