Fanucci commited on
Commit
893dcb6
·
verified ·
1 Parent(s): e35003b

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc4ff4a862db7f46e931bc03ba272c34c8f1522376a8a2b406743dfb6aa5becd
3
  size 4995335576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfa17f31bad6f2f516b55fc5b26c3f0332bc8434cfdc669afe2a257fd5d49d26
3
  size 4995335576
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09f1f0c8166dadba06fe6fc67f6066bed4cdb1ab73ea5aeb075d20803d8d6d3a
3
  size 1857639032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eebc5675b2520100d7f6c4e7ae79cdf86ed8a10ff60e9eea536fab023e69d535
3
  size 1857639032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b4739482302d1b180f4028bb16db49eef68df7df979486c1d696295c50ea91e
3
  size 13706103974
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6625b6e9d9d8512c25cd93e8dfa0ed45c6ad5f2b482019b1b579968a9acf0a9f
3
  size 13706103974
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f37c40ce327861a7ca13b719d3aa37510a143368b6e74358bdb14becb3899e1e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecefbb3f17bb76b6655eb0157c98b5287c17fa4b4c72a6b9068b0823ce9fd18d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01bc98a8690d286a0c5c6c74f6f325ac33ceb1fd4ad50ba634b85c5c1612f447
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76d2b2a58cb1fef054c26ee40b50f34fb3a71e56ece66a18947891aede843123
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 1,
3
  "best_metric": 1.4945952892303467,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.006246096189881324,
6
  "eval_steps": 50,
7
- "global_step": 100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -102,6 +102,49 @@
102
  "eval_samples_per_second": 15.398,
103
  "eval_steps_per_second": 15.398,
104
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  }
106
  ],
107
  "logging_steps": 10,
@@ -116,7 +159,7 @@
116
  "early_stopping_threshold": 0.0
117
  },
118
  "attributes": {
119
- "early_stopping_patience_counter": 2
120
  }
121
  },
122
  "TrainerControl": {
@@ -130,7 +173,7 @@
130
  "attributes": {}
131
  }
132
  },
133
- "total_flos": 4084621639680000.0,
134
  "train_batch_size": 1,
135
  "trial_name": null,
136
  "trial_params": null
 
2
  "best_global_step": 1,
3
  "best_metric": 1.4945952892303467,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.009369144284821987,
6
  "eval_steps": 50,
7
+ "global_step": 150,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
102
  "eval_samples_per_second": 15.398,
103
  "eval_steps_per_second": 15.398,
104
  "step": 100
105
+ },
106
+ {
107
+ "epoch": 0.006870705808869456,
108
+ "grad_norm": 99.0,
109
+ "learning_rate": 0.048776412907378844,
110
+ "loss": 16.2813,
111
+ "step": 110
112
+ },
113
+ {
114
+ "epoch": 0.007495315427857589,
115
+ "grad_norm": 15.0,
116
+ "learning_rate": 0.04851933072501756,
117
+ "loss": 14.2515,
118
+ "step": 120
119
+ },
120
+ {
121
+ "epoch": 0.008119925046845722,
122
+ "grad_norm": 30.625,
123
+ "learning_rate": 0.048238566570264485,
124
+ "loss": 11.9391,
125
+ "step": 130
126
+ },
127
+ {
128
+ "epoch": 0.008744534665833853,
129
+ "grad_norm": 24.125,
130
+ "learning_rate": 0.047934403148824085,
131
+ "loss": 11.4894,
132
+ "step": 140
133
+ },
134
+ {
135
+ "epoch": 0.009369144284821987,
136
+ "grad_norm": 20.75,
137
+ "learning_rate": 0.047607146727478934,
138
+ "loss": 11.6593,
139
+ "step": 150
140
+ },
141
+ {
142
+ "epoch": 0.009369144284821987,
143
+ "eval_loss": 10.226225852966309,
144
+ "eval_runtime": 55.6759,
145
+ "eval_samples_per_second": 15.141,
146
+ "eval_steps_per_second": 15.141,
147
+ "step": 150
148
  }
149
  ],
150
  "logging_steps": 10,
 
159
  "early_stopping_threshold": 0.0
160
  },
161
  "attributes": {
162
+ "early_stopping_patience_counter": 3
163
  }
164
  },
165
  "TrainerControl": {
 
173
  "attributes": {}
174
  }
175
  },
176
+ "total_flos": 6126932459520000.0,
177
  "train_batch_size": 1,
178
  "trial_name": null,
179
  "trial_params": null