moos124 commited on
Commit
bd8999a
·
verified ·
1 Parent(s): 8f179b9

Training in progress, step 30, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b64336964bc4db0ec301681ba18f9e670e5c49901da3e7649561585811773a28
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4062594e114e2315a1edba8da3ac90a92afe8b4861edad173c5a9442f5a641e1
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9377de11befdd479505b15c4b72424f103a13d434b5966a26370f1bbd95f4edd
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23cf2d68300724a5d92a2990d4500ca4891a54b6547642c663c91353db6f2dc5
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4827ed25954d4dc04f5898f4147ad6ed2ac6a723dc4502ee69d75766dc127a9c
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a1a46db61c6f50336216b23d625cdc1383567b8f93555746aabef346816d0b6
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:560de70bab917d96290b18fce132deecf69387371fbf756275f24d8fe0b28ff7
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61720638c669c5801226066b9c136f6fb6497d15f71777831770cc7cd9c8d6d9
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.004266666666666667,
6
  "eval_steps": 500,
7
- "global_step": 20,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -38,6 +38,16 @@
38
  "mean_token_accuracy": 0.6748957321047783,
39
  "num_tokens": 90428.0,
40
  "step": 20
 
 
 
 
 
 
 
 
 
 
41
  }
42
  ],
43
  "logging_steps": 10,
@@ -57,7 +67,7 @@
57
  "attributes": {}
58
  }
59
  },
60
- "total_flos": 443951686388736.0,
61
  "train_batch_size": 4,
62
  "trial_name": null,
63
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.0064,
6
  "eval_steps": 500,
7
+ "global_step": 30,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
38
  "mean_token_accuracy": 0.6748957321047783,
39
  "num_tokens": 90428.0,
40
  "step": 20
41
+ },
42
+ {
43
+ "entropy": 1.3042965233325958,
44
+ "epoch": 0.0064,
45
+ "grad_norm": 0.39715778827667236,
46
+ "learning_rate": 9.666666666666667e-06,
47
+ "loss": 2.214934539794922,
48
+ "mean_token_accuracy": 0.6405263364315033,
49
+ "num_tokens": 140516.0,
50
+ "step": 30
51
  }
52
  ],
53
  "logging_steps": 10,
 
67
  "attributes": {}
68
  }
69
  },
70
+ "total_flos": 700501563743232.0,
71
  "train_batch_size": 4,
72
  "trial_name": null,
73
  "trial_params": null