moos124 commited on
Commit
324889b
·
verified ·
1 Parent(s): 1b37edb

Training in progress, step 1030, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:668123144411634ca201e1e2c398f8b0dc431ed91b3b738160a2310edeb38f0f
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:467a2819de33d13cd3ec6c1d329884b789a6ebd587b70a4aeef7547662b8c67a
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:252b7e9daf2fbcdac4289e793c05c866db218cf96123bf8e819aa6230d273648
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2826672e1a760302c409493097e81d6c446eb19f5305b4def04adf0dc1424d3b
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcec43d17a26608b8bfc06c04154fb7c3a2bd34c443f3de04f2fe54081c5f1d9
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a48398753797982f57132e4a985d57b2f06dc0a69fa03baa04266aa47eed7a3f
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b101d0a265445c70ecce8df0b28001fba354a395d578e7ea1671d59fbada1b1e
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:110b2198a6eb8399c829026bd40642ad3b90a8a396ba65aeafa58208cd8b7210
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.2176,
6
  "eval_steps": 500,
7
- "global_step": 1020,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1038,6 +1038,16 @@
1038
  "mean_token_accuracy": 0.7686716303229332,
1039
  "num_tokens": 4754969.0,
1040
  "step": 1020
 
 
 
 
 
 
 
 
 
 
1041
  }
1042
  ],
1043
  "logging_steps": 10,
@@ -1057,7 +1067,7 @@
1057
  "attributes": {}
1058
  }
1059
  },
1060
- "total_flos": 2.248124189775667e+16,
1061
  "train_batch_size": 4,
1062
  "trial_name": null,
1063
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.21973333333333334,
6
  "eval_steps": 500,
7
+ "global_step": 1030,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1038
  "mean_token_accuracy": 0.7686716303229332,
1039
  "num_tokens": 4754969.0,
1040
  "step": 1020
1041
+ },
1042
+ {
1043
+ "entropy": 0.9788610845804214,
1044
+ "epoch": 0.21973333333333334,
1045
+ "grad_norm": 0.22762160003185272,
1046
+ "learning_rate": 9.841656531558163e-05,
1047
+ "loss": 1.0857264518737793,
1048
+ "mean_token_accuracy": 0.755905470252037,
1049
+ "num_tokens": 4799172.0,
1050
+ "step": 1030
1051
  }
1052
  ],
1053
  "logging_steps": 10,
 
1067
  "attributes": {}
1068
  }
1069
  },
1070
+ "total_flos": 2.2688577960293376e+16,
1071
  "train_batch_size": 4,
1072
  "trial_name": null,
1073
  "trial_params": null