bhuvanmdev commited on
Commit
63f70b6
·
verified ·
1 Parent(s): 6ff985e

Training in progress, step 1560, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53f24760c3a76f03c11ceb3b090eb745146db3c3cceff8d0f5c2208a02ff460a
3
  size 1316913776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b1b606eb9fd42d003e24e4c311d5a76dc8b2194c3359be99a0f7877260ebc45
3
  size 1316913776
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6b20a7be78cf9001416e8e87742c91e3bbdbd5a574b71733c30e9526d3e8fef
3
  size 8908124
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9009dfe23d5ba22f1b4f8a4580763d728a499c43d2116dd63b636be2a270eae
3
  size 8908124
last-checkpoint/rng_state.pth CHANGED
Binary files a/last-checkpoint/rng_state.pth and b/last-checkpoint/rng_state.pth differ
 
last-checkpoint/scheduler.pt CHANGED
Binary files a/last-checkpoint/scheduler.pt and b/last-checkpoint/scheduler.pt differ
 
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3284671532846715,
5
  "eval_steps": 500,
6
- "global_step": 1530,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1078,6 +1078,27 @@
1078
  "learning_rate": 0.00033576642335766423,
1079
  "loss": 0.7974,
1080
  "step": 1530
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1081
  }
1082
  ],
1083
  "logging_steps": 10,
@@ -1085,7 +1106,7 @@
1085
  "num_input_tokens_seen": 0,
1086
  "num_train_epochs": 1,
1087
  "save_steps": 30,
1088
- "total_flos": 1.156634896584983e+17,
1089
  "train_batch_size": 3,
1090
  "trial_name": null,
1091
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.334907685702018,
5
  "eval_steps": 500,
6
+ "global_step": 1560,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1078
  "learning_rate": 0.00033576642335766423,
1079
  "loss": 0.7974,
1080
  "step": 1530
1081
+ },
1082
+ {
1083
+ "epoch": 0.33061399742378705,
1084
+ "grad_norm": 1.8875946998596191,
1085
+ "learning_rate": 0.0003346930012881065,
1086
+ "loss": 0.7835,
1087
+ "step": 1540
1088
+ },
1089
+ {
1090
+ "epoch": 0.33276084156290253,
1091
+ "grad_norm": 1.5482693910598755,
1092
+ "learning_rate": 0.0003336195792185487,
1093
+ "loss": 0.7866,
1094
+ "step": 1550
1095
+ },
1096
+ {
1097
+ "epoch": 0.334907685702018,
1098
+ "grad_norm": 1.1274839639663696,
1099
+ "learning_rate": 0.000332546157148991,
1100
+ "loss": 0.7964,
1101
+ "step": 1560
1102
  }
1103
  ],
1104
  "logging_steps": 10,
 
1106
  "num_input_tokens_seen": 0,
1107
  "num_train_epochs": 1,
1108
  "save_steps": 30,
1109
+ "total_flos": 1.1783823328185754e+17,
1110
  "train_batch_size": 3,
1111
  "trial_name": null,
1112
  "trial_params": null