moos124 commited on
Commit
c19f5b5
·
verified ·
1 Parent(s): ed61adc

Training in progress, step 20, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0963d2e30d0792e57a4f3044b487b489bb0ac90cddae9da06468f601c3c4f12c
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b64336964bc4db0ec301681ba18f9e670e5c49901da3e7649561585811773a28
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52ae1d61b6cd3d65e87bcc7022c2bfab370bb0d25d565449ec9da8473f37807b
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9377de11befdd479505b15c4b72424f103a13d434b5966a26370f1bbd95f4edd
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f24589a086ed1943d2a8b45e4df895094a9bcd9d2a73b543b123571d8d68d706
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4827ed25954d4dc04f5898f4147ad6ed2ac6a723dc4502ee69d75766dc127a9c
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1eed7e84bb3655788b02424658c40a717997801da648c848ee0e5ebbda010bba
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:560de70bab917d96290b18fce132deecf69387371fbf756275f24d8fe0b28ff7
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.0021333333333333334,
6
  "eval_steps": 500,
7
- "global_step": 10,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -28,6 +28,16 @@
28
  "mean_token_accuracy": 0.65218452612559,
29
  "num_tokens": 39906.0,
30
  "step": 10
 
 
 
 
 
 
 
 
 
 
31
  }
32
  ],
33
  "logging_steps": 10,
@@ -47,7 +57,7 @@
47
  "attributes": {}
48
  }
49
  },
50
- "total_flos": 195503478970368.0,
51
  "train_batch_size": 4,
52
  "trial_name": null,
53
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.004266666666666667,
6
  "eval_steps": 500,
7
+ "global_step": 20,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
28
  "mean_token_accuracy": 0.65218452612559,
29
  "num_tokens": 39906.0,
30
  "step": 10
31
+ },
32
+ {
33
+ "entropy": 1.1284118384122848,
34
+ "epoch": 0.004266666666666667,
35
+ "grad_norm": 0.49733078479766846,
36
+ "learning_rate": 6.333333333333334e-06,
37
+ "loss": 1.9721708297729492,
38
+ "mean_token_accuracy": 0.6748957321047783,
39
+ "num_tokens": 90428.0,
40
+ "step": 20
41
  }
42
  ],
43
  "logging_steps": 10,
 
57
  "attributes": {}
58
  }
59
  },
60
+ "total_flos": 443951686388736.0,
61
  "train_batch_size": 4,
62
  "trial_name": null,
63
  "trial_params": null