mgh6 commited on
Commit
ea5f65f
·
verified ·
1 Parent(s): be04b58

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8927683e0409ef21ad2b9bc8cc13c6800cfb5590a23142ced607e08869d074fb
3
  size 8137792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9230a8f1667356b7ad63b361e8fff561dc479c69811d39b580b9a55743136501
3
  size 8137792
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3219639986ad86e4811f92ac562e0c772d743df77fd611625fcebdb38339be0
3
  size 16386426
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1589c4d786a9ec20e39e5d8cd54b1caca272380901a11607662ecea8e3582353
3
  size 16386426
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f2115101af4d4b5e45b0940e0c622ad77b0a5d2f224127198cef7127193e2f8
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b57e66d927cb63d49164c233351ff4f91afc1c694e0198261b327f8e4910ca2
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca19c73ed66fbba4990e15bea507703b1f5228216fc2abeb22d8d074d1228662
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ad037ff70efcc9b0b2a0dc7ac7e0d0f1a8eebe556624b17336fdf4fa1dfdb3a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.7995625683486955,
5
  "eval_steps": 500,
6
- "global_step": 1400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -121,6 +121,21 @@
121
  "learning_rate": 9.066666666666667e-05,
122
  "loss": 1.3047,
123
  "step": 1400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  }
125
  ],
126
  "logging_steps": 100,
@@ -140,7 +155,7 @@
140
  "attributes": {}
141
  }
142
  },
143
- "total_flos": 2.185031282893783e+18,
144
  "train_batch_size": 8,
145
  "trial_name": null,
146
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.9995313232307454,
5
  "eval_steps": 500,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
121
  "learning_rate": 9.066666666666667e-05,
122
  "loss": 1.3047,
123
  "step": 1400
124
+ },
125
+ {
126
+ "epoch": 2.9995313232307454,
127
+ "grad_norm": 0.13826213777065277,
128
+ "learning_rate": 9e-05,
129
+ "loss": 1.3061,
130
+ "step": 1500
131
+ },
132
+ {
133
+ "epoch": 2.9995313232307454,
134
+ "eval_loss": 1.1681571006774902,
135
+ "eval_runtime": 781.2456,
136
+ "eval_samples_per_second": 19.868,
137
+ "eval_steps_per_second": 2.484,
138
+ "step": 1500
139
  }
140
  ],
141
  "logging_steps": 100,
 
155
  "attributes": {}
156
  }
157
  },
158
+ "total_flos": 2.3411062540819497e+18,
159
  "train_batch_size": 8,
160
  "trial_name": null,
161
  "trial_params": null