moos124 commited on
Commit
cc14ba4
·
verified ·
1 Parent(s): fb883e1

Training in progress, step 1050, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:075b9f5e3c3ecf9ca9b4beed132f80cd7ca20fa3ec7ca20013aa2dc046c1c437
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6ad9d4121458f767533edfdc993a25e94121f6db3ce90446eb882661c48cc1c
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:506be5bfa3b1dd01b0f55b9694c617d44dc343820055c8cd9ec92e99e172c00c
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2ed1ce51e87f0a8d2a2d62bf93516ea5a884b530e7bc871f3368b39235952d4
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff25b0e531e529b7251efa89c5c2c64db372b3318238f40a081a3e14b70997e6
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19468c0e1756e3077097eb2dc7d2dd2e6a8672a7daab66fbe812d41a50dfa0d7
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:994a303da51f808e3d008f9bf196f10e75829171e37a8d639210b7ab263db8ca
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da7380d5cf110b7d9db786bae99f249fd7994ec4cd093646cfbe6add9b059d77
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.22186666666666666,
6
  "eval_steps": 500,
7
- "global_step": 1040,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1058,6 +1058,16 @@
1058
  "mean_token_accuracy": 0.7810183942317963,
1059
  "num_tokens": 4841210.0,
1060
  "step": 1040
 
 
 
 
 
 
 
 
 
 
1061
  }
1062
  ],
1063
  "logging_steps": 10,
@@ -1077,7 +1087,7 @@
1077
  "attributes": {}
1078
  }
1079
  },
1080
- "total_flos": 2.2882513374114816e+16,
1081
  "train_batch_size": 4,
1082
  "trial_name": null,
1083
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.224,
6
  "eval_steps": 500,
7
+ "global_step": 1050,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1058
  "mean_token_accuracy": 0.7810183942317963,
1059
  "num_tokens": 4841210.0,
1060
  "step": 1040
1061
+ },
1062
+ {
1063
+ "entropy": 1.0130531772971154,
1064
+ "epoch": 0.224,
1065
+ "grad_norm": 0.254517138004303,
1066
+ "learning_rate": 9.832898475119446e-05,
1067
+ "loss": 1.0863225936889649,
1068
+ "mean_token_accuracy": 0.7473610386252403,
1069
+ "num_tokens": 4889983.0,
1070
+ "step": 1050
1071
  }
1072
  ],
1073
  "logging_steps": 10,
 
1087
  "attributes": {}
1088
  }
1089
  },
1090
+ "total_flos": 2.3115371048730624e+16,
1091
  "train_batch_size": 4,
1092
  "trial_name": null,
1093
  "trial_params": null