moos124 commited on
Commit
1989383
·
verified ·
1 Parent(s): d2a980e

Training in progress, step 4060, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1b182de9e97846b1084d19e9af0b52d76beb68e8c444d42f03c0ff5b87e0bfc
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f007c2221c1bc301388b79145c614ed983d2542e94080ed22a4b624f9ff1ece
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3fdcee93c13a877b95a7f46c018562b216c23d8b7a35ece496bb2e1d5d63662
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5da227aa50919d031077db1df0297f7d33c7033e8961040c77fdece9e0c94738
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:214f18f538be57b8ee49d1c33812e0b73aa3fb9a5107366eeb8320cecc774eed
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b85c092cde13764b6b5711f763d019caf021e33539e4cb4643c1c9318c4c4cc
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b531e924a0d7daf3da89b8cde80cdae7a3ec74668202fc82b384dec42ff7f133
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14d979ac4950e19d80df1fe5bad94fbbd83051f665822fc85373fc81da166d74
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.864,
6
  "eval_steps": 500,
7
- "global_step": 4050,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4068,6 +4068,16 @@
4068
  "mean_token_accuracy": 0.7608293548226357,
4069
  "num_tokens": 18891900.0,
4070
  "step": 4050
 
 
 
 
 
 
 
 
 
 
4071
  }
4072
  ],
4073
  "logging_steps": 10,
@@ -4087,7 +4097,7 @@
4087
  "attributes": {}
4088
  }
4089
  },
4090
- "total_flos": 8.946135200719565e+16,
4091
  "train_batch_size": 4,
4092
  "trial_name": null,
4093
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.8661333333333333,
6
  "eval_steps": 500,
7
+ "global_step": 4060,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4068
  "mean_token_accuracy": 0.7608293548226357,
4069
  "num_tokens": 18891900.0,
4070
  "step": 4050
4071
+ },
4072
+ {
4073
+ "entropy": 0.9185742639005184,
4074
+ "epoch": 0.8661333333333333,
4075
+ "grad_norm": 0.2510085701942444,
4076
+ "learning_rate": 6.331947817038367e-05,
4077
+ "loss": 0.9962324142456055,
4078
+ "mean_token_accuracy": 0.7723157353699207,
4079
+ "num_tokens": 18938986.0,
4080
+ "step": 4060
4081
  }
4082
  ],
4083
  "logging_steps": 10,
 
4097
  "attributes": {}
4098
  }
4099
  },
4100
+ "total_flos": 8.96706706808402e+16,
4101
  "train_batch_size": 4,
4102
  "trial_name": null,
4103
  "trial_params": null