simon-mellergaard commited on
Commit
e250d67
·
verified ·
1 Parent(s): 80c32c1

Training in progress, step 1000

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e457f27461b2483b852da76fe894d68c3544c2ae66ba9545672834917c367150
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c93d681696ed3445c446a7b2e66b7998d98302e5f7b397e53af0ecf449cee1e
3
  size 598898116
run-0/checkpoint-1000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3968e1b921312e34b6af204e89f578af594570e7f013fb0b5d7a0cc0f48eed92
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c93d681696ed3445c446a7b2e66b7998d98302e5f7b397e53af0ecf449cee1e
3
  size 598898116
run-0/checkpoint-1000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf78e2c6f4f050744c7babc85326d5ad15003ef16d976da1de97fceea9812414
3
  size 1197884026
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:212305b2b0de4dcf577037010c70ea29a5181579846dccb1f434c7565db13925
3
  size 1197884026
run-0/checkpoint-1000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f125fa33a5ddf9a6263576bd35ad82a77abf349001c5cbe06d69a6e3bbe3bf28
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c32bdbd7d3ec7581d6659ed0a44478218265d8d53be00c083550f24c6010954b
3
  size 1064
run-0/checkpoint-1000/trainer_state.json CHANGED
@@ -11,98 +11,98 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.7422580645161291,
15
- "eval_loss": 2.6352736949920654,
16
- "eval_runtime": 20.1609,
17
- "eval_samples_per_second": 153.763,
18
- "eval_steps_per_second": 1.637,
19
  "step": 159
20
  },
21
  {
22
  "epoch": 1.2578616352201257,
23
- "grad_norm": 4.528295516967773,
24
- "learning_rate": 1.7218728162124388e-05,
25
- "loss": 3.9909,
26
  "step": 200
27
  },
28
  {
29
  "epoch": 2.0,
30
- "eval_accuracy": 0.917741935483871,
31
- "eval_loss": 1.4726468324661255,
32
- "eval_runtime": 20.7041,
33
- "eval_samples_per_second": 149.729,
34
- "eval_steps_per_second": 1.594,
35
  "step": 318
36
  },
37
  {
38
  "epoch": 2.5157232704402515,
39
- "grad_norm": 4.496422290802002,
40
- "learning_rate": 1.4423480083857445e-05,
41
- "loss": 1.6,
42
  "step": 400
43
  },
44
  {
45
  "epoch": 3.0,
46
- "eval_accuracy": 0.9419354838709677,
47
- "eval_loss": 1.0404900312423706,
48
- "eval_runtime": 20.139,
49
- "eval_samples_per_second": 153.93,
50
- "eval_steps_per_second": 1.639,
51
  "step": 477
52
  },
53
  {
54
  "epoch": 3.7735849056603774,
55
- "grad_norm": 3.2070181369781494,
56
- "learning_rate": 1.1628232005590496e-05,
57
- "loss": 0.9245,
58
  "step": 600
59
  },
60
  {
61
  "epoch": 4.0,
62
- "eval_accuracy": 0.9496774193548387,
63
- "eval_loss": 0.8541080355644226,
64
- "eval_runtime": 20.1419,
65
- "eval_samples_per_second": 153.908,
66
- "eval_steps_per_second": 1.638,
67
  "step": 636
68
  },
69
  {
70
  "epoch": 5.0,
71
- "eval_accuracy": 0.9551612903225807,
72
- "eval_loss": 0.7533753514289856,
73
- "eval_runtime": 20.1364,
74
- "eval_samples_per_second": 153.95,
75
- "eval_steps_per_second": 1.639,
76
  "step": 795
77
  },
78
  {
79
  "epoch": 5.031446540880503,
80
- "grad_norm": 2.5381293296813965,
81
- "learning_rate": 8.832983927323551e-06,
82
- "loss": 0.6759,
83
  "step": 800
84
  },
85
  {
86
  "epoch": 6.0,
87
- "eval_accuracy": 0.9548387096774194,
88
- "eval_loss": 0.697773277759552,
89
- "eval_runtime": 20.2811,
90
- "eval_samples_per_second": 152.851,
91
- "eval_steps_per_second": 1.627,
92
  "step": 954
93
  },
94
  {
95
  "epoch": 6.289308176100629,
96
- "grad_norm": 2.103200674057007,
97
- "learning_rate": 6.037735849056604e-06,
98
- "loss": 0.5553,
99
  "step": 1000
100
  }
101
  ],
102
  "logging_steps": 200,
103
- "max_steps": 1431,
104
  "num_input_tokens_seen": 0,
105
- "num_train_epochs": 9,
106
  "save_steps": 500,
107
  "stateful_callbacks": {
108
  "TrainerControl": {
@@ -116,12 +116,12 @@
116
  "attributes": {}
117
  }
118
  },
119
- "total_flos": 1537764459796332.0,
120
  "train_batch_size": 96,
121
  "trial_name": null,
122
  "trial_params": {
123
- "alpha": 0.2851183254926115,
124
- "num_train_epochs": 9,
125
  "temperature": 13
126
  }
127
  }
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.7464516129032258,
15
+ "eval_loss": 2.6676511764526367,
16
+ "eval_runtime": 26.5969,
17
+ "eval_samples_per_second": 116.555,
18
+ "eval_steps_per_second": 1.241,
19
  "step": 159
20
  },
21
  {
22
  "epoch": 1.2578616352201257,
23
+ "grad_norm": 4.724590301513672,
24
+ "learning_rate": 1.6871069182389938e-05,
25
+ "loss": 4.0219,
26
  "step": 200
27
  },
28
  {
29
  "epoch": 2.0,
30
+ "eval_accuracy": 0.9209677419354839,
31
+ "eval_loss": 1.4983173608779907,
32
+ "eval_runtime": 22.9577,
33
+ "eval_samples_per_second": 135.031,
34
+ "eval_steps_per_second": 1.437,
35
  "step": 318
36
  },
37
  {
38
  "epoch": 2.5157232704402515,
39
+ "grad_norm": 4.552013397216797,
40
+ "learning_rate": 1.3726415094339625e-05,
41
+ "loss": 1.6317,
42
  "step": 400
43
  },
44
  {
45
  "epoch": 3.0,
46
+ "eval_accuracy": 0.9432258064516129,
47
+ "eval_loss": 1.065486192703247,
48
+ "eval_runtime": 22.9055,
49
+ "eval_samples_per_second": 135.339,
50
+ "eval_steps_per_second": 1.441,
51
  "step": 477
52
  },
53
  {
54
  "epoch": 3.7735849056603774,
55
+ "grad_norm": 2.870781183242798,
56
+ "learning_rate": 1.0581761006289309e-05,
57
+ "loss": 0.9522,
58
  "step": 600
59
  },
60
  {
61
  "epoch": 4.0,
62
+ "eval_accuracy": 0.9483870967741935,
63
+ "eval_loss": 0.8771675229072571,
64
+ "eval_runtime": 22.9315,
65
+ "eval_samples_per_second": 135.185,
66
+ "eval_steps_per_second": 1.439,
67
  "step": 636
68
  },
69
  {
70
  "epoch": 5.0,
71
+ "eval_accuracy": 0.954516129032258,
72
+ "eval_loss": 0.781271755695343,
73
+ "eval_runtime": 22.8478,
74
+ "eval_samples_per_second": 135.681,
75
+ "eval_steps_per_second": 1.444,
76
  "step": 795
77
  },
78
  {
79
  "epoch": 5.031446540880503,
80
+ "grad_norm": 2.469351291656494,
81
+ "learning_rate": 7.437106918238994e-06,
82
+ "loss": 0.7022,
83
  "step": 800
84
  },
85
  {
86
  "epoch": 6.0,
87
+ "eval_accuracy": 0.9554838709677419,
88
+ "eval_loss": 0.7265329957008362,
89
+ "eval_runtime": 22.9211,
90
+ "eval_samples_per_second": 135.247,
91
+ "eval_steps_per_second": 1.44,
92
  "step": 954
93
  },
94
  {
95
  "epoch": 6.289308176100629,
96
+ "grad_norm": 2.1010658740997314,
97
+ "learning_rate": 4.29245283018868e-06,
98
+ "loss": 0.5869,
99
  "step": 1000
100
  }
101
  ],
102
  "logging_steps": 200,
103
+ "max_steps": 1272,
104
  "num_input_tokens_seen": 0,
105
+ "num_train_epochs": 8,
106
  "save_steps": 500,
107
  "stateful_callbacks": {
108
  "TrainerControl": {
 
116
  "attributes": {}
117
  }
118
  },
119
+ "total_flos": 1735586846963244.0,
120
  "train_batch_size": 96,
121
  "trial_name": null,
122
  "trial_params": {
123
+ "alpha": 0.9713674411299632,
124
+ "num_train_epochs": 8,
125
  "temperature": 13
126
  }
127
  }
run-0/checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24328dab82c8f44c9479f6ea68413bde6277bd3d515c5aa473bf282bd0ce474a
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1b5ead62771fb4a133ea7812d8458dd760625e18862235c9bfe1d87fb1d371e
3
  size 5368
runs/Sep28_10-21-38_bb2384aee55a/events.out.tfevents.1759054927.bb2384aee55a.76.1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c55ff104bb86552772b59785b9901ff4ca17990bc3273b258c2a48bd9dfaf55
3
- size 27551
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a8beb8393214e32bd7130b57292c4b0881f1e809312759f4d2821485964c5b8
3
+ size 29153