moos124 commited on
Commit
c060b5c
·
verified ·
1 Parent(s): 7e6591a

Training in progress, step 40, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4062594e114e2315a1edba8da3ac90a92afe8b4861edad173c5a9442f5a641e1
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:711ba2d185c44fc8e20ab4f888d62244e3c1309e7490154583be93f2b928c3b2
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23cf2d68300724a5d92a2990d4500ca4891a54b6547642c663c91353db6f2dc5
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4a6f5f5c5a09e17c46b8b0ba0b83e713bf257aa5fb227f4925ed8ce8369a498
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a1a46db61c6f50336216b23d625cdc1383567b8f93555746aabef346816d0b6
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b0aa5778692ef33a083df0d0ba2997f41efb39e2997b2a7fb5e6458610d0f68
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61720638c669c5801226066b9c136f6fb6497d15f71777831770cc7cd9c8d6d9
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adb30875ad280e3402b454dabfc01daec7a7f2c00677260bacf1b4c6b86f5931
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.0064,
6
  "eval_steps": 500,
7
- "global_step": 30,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -48,6 +48,16 @@
48
  "mean_token_accuracy": 0.6405263364315033,
49
  "num_tokens": 140516.0,
50
  "step": 30
 
 
 
 
 
 
 
 
 
 
51
  }
52
  ],
53
  "logging_steps": 10,
@@ -67,7 +77,7 @@
67
  "attributes": {}
68
  }
69
  },
70
- "total_flos": 700501563743232.0,
71
  "train_batch_size": 4,
72
  "trial_name": null,
73
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.008533333333333334,
6
  "eval_steps": 500,
7
+ "global_step": 40,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
48
  "mean_token_accuracy": 0.6405263364315033,
49
  "num_tokens": 140516.0,
50
  "step": 30
51
+ },
52
+ {
53
+ "entropy": 1.206481871008873,
54
+ "epoch": 0.008533333333333334,
55
+ "grad_norm": 0.5763714909553528,
56
+ "learning_rate": 1.3000000000000001e-05,
57
+ "loss": 2.0720544815063477,
58
+ "mean_token_accuracy": 0.6752387754619121,
59
+ "num_tokens": 182338.0,
60
+ "step": 40
61
  }
62
  ],
63
  "logging_steps": 10,
 
77
  "attributes": {}
78
  }
79
  },
80
+ "total_flos": 899808051293184.0,
81
  "train_batch_size": 4,
82
  "trial_name": null,
83
  "trial_params": null