moos124 commited on
Commit
692871c
·
verified ·
1 Parent(s): d8c9221

Training in progress, step 4050, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c3ea49652fbbb2b175f2aaefd89dad902275a26e5f04e2afff4ad74fad31eec
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1b182de9e97846b1084d19e9af0b52d76beb68e8c444d42f03c0ff5b87e0bfc
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a5b3bf37303de960a4e639eed821190068e427e905b014fa22c80e16e0be996
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3fdcee93c13a877b95a7f46c018562b216c23d8b7a35ece496bb2e1d5d63662
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55e44189f4dc98b82edd710824ce7cbb3ae7bb51d01013c13cf3ae301059692a
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:214f18f538be57b8ee49d1c33812e0b73aa3fb9a5107366eeb8320cecc774eed
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2de73afb2b93f35c21bead0273f09ebb37a775710b76a8e54a2494f9892e0a49
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b531e924a0d7daf3da89b8cde80cdae7a3ec74668202fc82b384dec42ff7f133
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.8618666666666667,
6
  "eval_steps": 500,
7
- "global_step": 4040,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4058,6 +4058,16 @@
4058
  "mean_token_accuracy": 0.7643599301576615,
4059
  "num_tokens": 18843087.0,
4060
  "step": 4040
 
 
 
 
 
 
 
 
 
 
4061
  }
4062
  ],
4063
  "logging_steps": 10,
@@ -4077,7 +4087,7 @@
4077
  "attributes": {}
4078
  }
4079
  },
4080
- "total_flos": 8.921480530406707e+16,
4081
  "train_batch_size": 4,
4082
  "trial_name": null,
4083
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.864,
6
  "eval_steps": 500,
7
+ "global_step": 4050,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4058
  "mean_token_accuracy": 0.7643599301576615,
4059
  "num_tokens": 18843087.0,
4060
  "step": 4040
4061
+ },
4062
+ {
4063
+ "entropy": 0.9580571033060551,
4064
+ "epoch": 0.864,
4065
+ "grad_norm": 0.2631727159023285,
4066
+ "learning_rate": 6.348621561803495e-05,
4067
+ "loss": 1.0001374244689942,
4068
+ "mean_token_accuracy": 0.7608293548226357,
4069
+ "num_tokens": 18891900.0,
4070
+ "step": 4050
4071
  }
4072
  ],
4073
  "logging_steps": 10,
 
4087
  "attributes": {}
4088
  }
4089
  },
4090
+ "total_flos": 8.946135200719565e+16,
4091
  "train_batch_size": 4,
4092
  "trial_name": null,
4093
  "trial_params": null