step=19220 loss=4.8626
Browse files
checkpoints/ckpt_20260307_081747/model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:633daa6f183708a07e83c955057d6e44d811904ad65f69d3aad0fbfaa14fb362
|
| 3 |
+
size 185473163
|
checkpoints/ckpt_20260307_081747/training_state.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"global_step": 19219,
|
| 3 |
+
"best_loss": 4.862577632069588,
|
| 4 |
+
"last_save_ts": "2026-03-07T08:16:09.933355",
|
| 5 |
+
"tokens_seen": 472326144,
|
| 6 |
+
"completed_datasets": [
|
| 7 |
+
"data/train/000_wikipedia.parquet",
|
| 8 |
+
"data/train/001_wikitext.parquet",
|
| 9 |
+
"data/train/002_stories.parquet",
|
| 10 |
+
"data/train/003_openwebtext.parquet",
|
| 11 |
+
"data/train/004_webtext.parquet",
|
| 12 |
+
"data/train/005_slimorca.parquet"
|
| 13 |
+
],
|
| 14 |
+
"current_dataset": "data/train/006_openhermes.parquet",
|
| 15 |
+
"current_dataset_tokens": 3735552
|
| 16 |
+
}
|