mgh6 commited on
Commit
6140848
·
verified ·
1 Parent(s): 7b3b681

Training in progress, step 800, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1bfe3de0e87ac55e372474748650d29469ea43618f69f4b3b1cda7a1c9e5275
3
  size 136000488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47630d4c3006b50921d7e36c744d50be38e8879aea194e330ca9b15f1daaafd3
3
  size 136000488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57e118cafcc459602f3924757e157e3852e7caa20692d2fb027a0bb1e66ba14c
3
  size 268176506
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:697d38dc27773b03d627eaea47fe209ad4c1e9a4073dc7b3d8e4eec46920a338
3
  size 268176506
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2af69833d69f080b3cc67beaefccdeb3962ea34b1a77f84a64e357eb155a372
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d27635aa3bf8c9513bde838cce12dd861db1a571c559e6e3d3706dff45fc7a8d
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f3f413b5f7ea3d1f3927898a7f680914683b4dac70ee76ea36b52d5795badb6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c95b96648dba4a14572860ec64f84946a5310ebc9249f9d3352c1018256aeef8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.181251049041748,
3
- "best_model_checkpoint": "mgh6/TCS_MLM_50/checkpoint-700",
4
- "epoch": 0.20319303338171263,
5
  "eval_steps": 100,
6
- "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -112,6 +112,21 @@
112
  "eval_samples_per_second": 213.197,
113
  "eval_steps_per_second": 3.332,
114
  "step": 700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  }
116
  ],
117
  "logging_steps": 100,
@@ -140,7 +155,7 @@
140
  "attributes": {}
141
  }
142
  },
143
- "total_flos": 7002713254526976.0,
144
  "train_batch_size": 64,
145
  "trial_name": null,
146
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.1703433990478516,
3
+ "best_model_checkpoint": "mgh6/TCS_MLM_50/checkpoint-800",
4
+ "epoch": 0.23222060957910015,
5
  "eval_steps": 100,
6
+ "global_step": 800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
112
  "eval_samples_per_second": 213.197,
113
  "eval_steps_per_second": 3.332,
114
  "step": 700
115
+ },
116
+ {
117
+ "epoch": 0.23222060957910015,
118
+ "grad_norm": 0.535000205039978,
119
+ "learning_rate": 0.00097677793904209,
120
+ "loss": 2.456,
121
+ "step": 800
122
+ },
123
+ {
124
+ "epoch": 0.23222060957910015,
125
+ "eval_loss": 1.1703433990478516,
126
+ "eval_runtime": 213.4281,
127
+ "eval_samples_per_second": 213.182,
128
+ "eval_steps_per_second": 3.331,
129
+ "step": 800
130
  }
131
  ],
132
  "logging_steps": 100,
 
155
  "attributes": {}
156
  }
157
  },
158
+ "total_flos": 8003100862316544.0,
159
  "train_batch_size": 64,
160
  "trial_name": null,
161
  "trial_params": null