step=37421 loss=4.8147
Browse files
checkpoints/ckpt_20260307_111638/model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14ba65b4e741831b4a936ebc2fc31d254df35f65ec4a7dd9f90bfcae715e9928
|
| 3 |
+
size 185473163
|
checkpoints/ckpt_20260307_111638/training_state.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"global_step": 37421,
|
| 3 |
+
"best_loss": 4.814651757478714,
|
| 4 |
+
"last_save_ts": "2026-03-07T11:14:10.219432",
|
| 5 |
+
"tokens_seen": 919658496,
|
| 6 |
+
"completed_datasets": [
|
| 7 |
+
"data/train/000_wikipedia.parquet",
|
| 8 |
+
"data/train/001_wikitext.parquet",
|
| 9 |
+
"data/train/002_stories.parquet",
|
| 10 |
+
"data/train/003_openwebtext.parquet",
|
| 11 |
+
"data/train/004_webtext.parquet",
|
| 12 |
+
"data/train/005_slimorca.parquet",
|
| 13 |
+
"data/train/006_openhermes.parquet",
|
| 14 |
+
"data/train/007_ultrachat.parquet",
|
| 15 |
+
"data/train/008_dolly.parquet"
|
| 16 |
+
],
|
| 17 |
+
"current_dataset": "data/train/010_alpaca.parquet",
|
| 18 |
+
"current_dataset_tokens": 4251648
|
| 19 |
+
}
|