mgh6 commited on
Commit
2e34c7e
·
verified ·
1 Parent(s): 9bea334

Training in progress, step 12700, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20209c99542f7da2bfd2b2fd316d79a098febcc7e6d97933676ca8519243fef8
3
  size 8137792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8de8c58363428e5a212a3a4cb4c6e603534a59706d0acc7f509add46757e4be
3
  size 8137792
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fafcafd823004fea685f073703b99d3b30b6ea6cd9eb562add16d551bc944cf
3
  size 16386426
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:515bb5c714672e38dd8b8f2e243f50d8e6fb7e87d6fe8dbfeb450dc14c73ca07
3
  size 16386426
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63e7093a03aff421324576bee20e161dd36ae794f84f7d6f2048e9b75dfc0e39
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f50ceb3ca0a6b0975d6fb26400fddb6871bf4da32dbc560fb730d74806ffd679
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7d0069998b5557819c7479913a77ca9f4d0fe4b4bfe083c19dbb1255201d352
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22a27c7ac8f887563d6b56d8cc814c0cb0fec885d54e460f34cdda123e9bcbe4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 25.19606311513826,
5
  "eval_steps": 500,
6
- "global_step": 12600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1089,6 +1089,13 @@
1089
  "learning_rate": 1.6000000000000003e-05,
1090
  "loss": 1.2214,
1091
  "step": 12600
 
 
 
 
 
 
 
1092
  }
1093
  ],
1094
  "logging_steps": 100,
@@ -1108,7 +1115,7 @@
1108
  "attributes": {}
1109
  }
1110
  },
1111
- "total_flos": 1.966521744795212e+19,
1112
  "train_batch_size": 8,
1113
  "trial_name": null,
1114
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 25.39603187002031,
5
  "eval_steps": 500,
6
+ "global_step": 12700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1089
  "learning_rate": 1.6000000000000003e-05,
1090
  "loss": 1.2214,
1091
  "step": 12600
1092
+ },
1093
+ {
1094
+ "epoch": 25.39603187002031,
1095
+ "grad_norm": 0.23873771727085114,
1096
+ "learning_rate": 1.5333333333333334e-05,
1097
+ "loss": 1.2228,
1098
+ "step": 12700
1099
  }
1100
  ],
1101
  "logging_steps": 100,
 
1115
  "attributes": {}
1116
  }
1117
  },
1118
+ "total_flos": 1.982129241914029e+19,
1119
  "train_batch_size": 8,
1120
  "trial_name": null,
1121
  "trial_params": null