mgh6 commited on
Commit
c9ff212
·
verified ·
1 Parent(s): 9edc0ea

Training in progress, step 12600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5e9efffe9b7c56a24141331717c7106d1e6b75b664c6b6e41c838b1c2ae800f
3
  size 8137792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20209c99542f7da2bfd2b2fd316d79a098febcc7e6d97933676ca8519243fef8
3
  size 8137792
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:271ded05124935b87806da10eeb3248f1f5e3b9d1821371983eb0947ab652255
3
  size 16386426
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fafcafd823004fea685f073703b99d3b30b6ea6cd9eb562add16d551bc944cf
3
  size 16386426
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a9d3e1e0e2094934ac230fdd75ebe4375103c7d57b000d50375350f3dfdec58
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63e7093a03aff421324576bee20e161dd36ae794f84f7d6f2048e9b75dfc0e39
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c24dc2fc932ae6f658653cc45eeb992571bdff268fdfc9138ada0f732a92dc5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7d0069998b5557819c7479913a77ca9f4d0fe4b4bfe083c19dbb1255201d352
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 24.99609436025621,
5
  "eval_steps": 500,
6
- "global_step": 12500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1082,6 +1082,13 @@
1082
  "eval_samples_per_second": 19.832,
1083
  "eval_steps_per_second": 2.48,
1084
  "step": 12500
 
 
 
 
 
 
 
1085
  }
1086
  ],
1087
  "logging_steps": 100,
@@ -1101,7 +1108,7 @@
1101
  "attributes": {}
1102
  }
1103
  },
1104
- "total_flos": 1.950915163363423e+19,
1105
  "train_batch_size": 8,
1106
  "trial_name": null,
1107
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 25.19606311513826,
5
  "eval_steps": 500,
6
+ "global_step": 12600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1082
  "eval_samples_per_second": 19.832,
1083
  "eval_steps_per_second": 2.48,
1084
  "step": 12500
1085
+ },
1086
+ {
1087
+ "epoch": 25.19606311513826,
1088
+ "grad_norm": 0.2269056737422943,
1089
+ "learning_rate": 1.6000000000000003e-05,
1090
+ "loss": 1.2214,
1091
+ "step": 12600
1092
  }
1093
  ],
1094
  "logging_steps": 100,
 
1108
  "attributes": {}
1109
  }
1110
  },
1111
+ "total_flos": 1.966521744795212e+19,
1112
  "train_batch_size": 8,
1113
  "trial_name": null,
1114
  "trial_params": null