moos124 commited on
Commit
10a66e3
·
verified ·
1 Parent(s): 7b7924c

Training in progress, step 1870, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6425eeda19ba056ee2b342cda29d5c0bd62bb338add5362226aed1306b74a862
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7f7c08832e92f3eeaa3fffe9fcce3c0dad0fb6473cfb08d2095dbee016d00a7
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:622066bcb88b8bee80fb586cbff8d63c12b45207f4bd8a9555fd879998bc2c7a
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43d094aa248f0598f9d95ed9c986ffa93a3fabcbfc45787cae041ac00ed84917
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6900bdcd2a41c2c94e4c353bba1be97fd1f96ffaf529e7eb7f2b15dc3f32bb65
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc34e013baf527021face29460cea50f3e631686955bd2ca7ee9759fff2228e1
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3389e098396e0e90ec77ded90427ea77132ff66ec75ee7e8cc8afe1416926945
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d713ff246b23b74f0424998c9c9a109a8f309e2172ab9e5adad25868e58ead9f
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.3968,
6
  "eval_steps": 500,
7
- "global_step": 1860,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1878,6 +1878,16 @@
1878
  "mean_token_accuracy": 0.7888635769486427,
1879
  "num_tokens": 8623046.0,
1880
  "step": 1860
 
 
 
 
 
 
 
 
 
 
1881
  }
1882
  ],
1883
  "logging_steps": 10,
@@ -1897,7 +1907,7 @@
1897
  "attributes": {}
1898
  }
1899
  },
1900
- "total_flos": 4.095164350061568e+16,
1901
  "train_batch_size": 4,
1902
  "trial_name": null,
1903
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.3989333333333333,
6
  "eval_steps": 500,
7
+ "global_step": 1870,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1878
  "mean_token_accuracy": 0.7888635769486427,
1879
  "num_tokens": 8623046.0,
1880
  "step": 1860
1881
+ },
1882
+ {
1883
+ "entropy": 0.9134793929755688,
1884
+ "epoch": 0.3989333333333333,
1885
+ "grad_norm": 0.25643301010131836,
1886
+ "learning_rate": 9.280558467355907e-05,
1887
+ "loss": 1.0032004356384276,
1888
+ "mean_token_accuracy": 0.7722208425402641,
1889
+ "num_tokens": 8665201.0,
1890
+ "step": 1870
1891
  }
1892
  ],
1893
  "logging_steps": 10,
 
1907
  "attributes": {}
1908
  }
1909
  },
1910
+ "total_flos": 4.115056247781274e+16,
1911
  "train_batch_size": 4,
1912
  "trial_name": null,
1913
  "trial_params": null