mgh6 commited on
Commit
eaf22b2
·
verified ·
1 Parent(s): 3e884d0

Training in progress, step 19000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a449235f3756d3813974ce7f6502c955cc5547260baa9444e160f6b6bf3a5df
3
  size 136000488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d734e6db78f60fbbbccb113593aad2dacb6d4ef24adf3f2d2f765c04c3543ac
3
  size 136000488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e61e967797bb87cf1a55bf0f69be53afb460e8bfa804153c9a886a3982e31791
3
  size 268176506
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93e652abff809338fab18533d877e6e5d8666ad14769f8c73336a2290bb1d2f4
3
  size 268176506
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f595db6798fd40d1f9625404652b3498dc8e2af6c56836d52a30a167ba08e35f
3
  size 14942
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec459b20953804686a31ef9dfbbe457e9e9185e9c16c0990f91f7b84833fdbbf
3
  size 14942
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5ca222bf68a3564a804857106a49feb87b18dba9a0be2082fc90028354b2ec2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1809e210db6349fe0ab123cca2b958d44ebb1729354689aaf0244de4c395185
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.335782247380465,
5
  "eval_steps": 500,
6
- "global_step": 18000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -133,6 +133,13 @@
133
  "learning_rate": 0.0008554565165020477,
134
  "loss": 1.0793,
135
  "step": 18000
 
 
 
 
 
 
 
136
  }
137
  ],
138
  "logging_steps": 1000,
@@ -152,7 +159,7 @@
152
  "attributes": {}
153
  }
154
  },
155
- "total_flos": 1.8006571361081754e+17,
156
  "train_batch_size": 64,
157
  "trial_name": null,
158
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.576659038901602,
5
  "eval_steps": 500,
6
+ "global_step": 19000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
133
  "learning_rate": 0.0008554565165020477,
134
  "loss": 1.0793,
135
  "step": 18000
136
+ },
137
+ {
138
+ "epoch": 4.576659038901602,
139
+ "grad_norm": 0.5131984353065491,
140
+ "learning_rate": 0.0008474263229743838,
141
+ "loss": 1.0737,
142
+ "step": 19000
143
  }
144
  ],
145
  "logging_steps": 1000,
 
159
  "attributes": {}
160
  }
161
  },
162
+ "total_flos": 1.900695900913664e+17,
163
  "train_batch_size": 64,
164
  "trial_name": null,
165
  "trial_params": null