mgh6 commited on
Commit
4ed424c
·
verified ·
1 Parent(s): 3db9386

Training in progress, step 1400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87d5249d20554fe354743154b54f927db5b78df91daf1cc35c2a79e0e2163c8e
3
  size 8137792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8927683e0409ef21ad2b9bc8cc13c6800cfb5590a23142ced607e08869d074fb
3
  size 8137792
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f981fe47449bc5d2fad9a41e39e69ecbbf3cf7f6b3c3fa5edd3e08822f8f2397
3
  size 16386426
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3219639986ad86e4811f92ac562e0c772d743df77fd611625fcebdb38339be0
3
  size 16386426
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed5a8a436709ced1493a3f831d259343bd3c0183bb82ea6d0dca2bdb52be88b7
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f2115101af4d4b5e45b0940e0c622ad77b0a5d2f224127198cef7127193e2f8
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f5ecbfe60b45227d9cf3903e66c2e3072c6f275fcc43ed807b20ab44b920fa9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca19c73ed66fbba4990e15bea507703b1f5228216fc2abeb22d8d074d1228662
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.5995938134666456,
5
  "eval_steps": 500,
6
- "global_step": 1300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -114,6 +114,13 @@
114
  "learning_rate": 9.133333333333334e-05,
115
  "loss": 1.3161,
116
  "step": 1300
 
 
 
 
 
 
 
117
  }
118
  ],
119
  "logging_steps": 100,
@@ -133,7 +140,7 @@
133
  "attributes": {}
134
  }
135
  },
136
- "total_flos": 2.0289563117056164e+18,
137
  "train_batch_size": 8,
138
  "trial_name": null,
139
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.7995625683486955,
5
  "eval_steps": 500,
6
+ "global_step": 1400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
114
  "learning_rate": 9.133333333333334e-05,
115
  "loss": 1.3161,
116
  "step": 1300
117
+ },
118
+ {
119
+ "epoch": 2.7995625683486955,
120
+ "grad_norm": 0.13487331569194794,
121
+ "learning_rate": 9.066666666666667e-05,
122
+ "loss": 1.3047,
123
+ "step": 1400
124
  }
125
  ],
126
  "logging_steps": 100,
 
140
  "attributes": {}
141
  }
142
  },
143
+ "total_flos": 2.185031282893783e+18,
144
  "train_batch_size": 8,
145
  "trial_name": null,
146
  "trial_params": null