step=9364 loss=5.5331
Browse files- latest/model.pt +1 -1
- latest/training_state.json +7 -6
latest/model.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 211933939
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd2040432eecbc846a41f58470d6cb548e9fea918cec0e7530a7bd92961278ec
|
| 3 |
size 211933939
|
latest/training_state.json
CHANGED
|
@@ -1,13 +1,14 @@
|
|
| 1 |
{
|
| 2 |
-
"global_step":
|
| 3 |
"best_loss": 5.533148889740308,
|
| 4 |
-
"last_save_ts": "2026-03-13T05:
|
| 5 |
-
"tokens_seen":
|
| 6 |
"completed_datasets": [
|
| 7 |
"data/train/000_wikipedia.parquet",
|
| 8 |
"data/train/001_fineweb.parquet",
|
| 9 |
-
"data/train/002_stories.parquet"
|
|
|
|
| 10 |
],
|
| 11 |
-
"current_dataset": "data/train/
|
| 12 |
-
"current_dataset_tokens":
|
| 13 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"global_step": 9364,
|
| 3 |
"best_loss": 5.533148889740308,
|
| 4 |
+
"last_save_ts": "2026-03-13T05:26:08.856940",
|
| 5 |
+
"tokens_seen": 230129664,
|
| 6 |
"completed_datasets": [
|
| 7 |
"data/train/000_wikipedia.parquet",
|
| 8 |
"data/train/001_fineweb.parquet",
|
| 9 |
+
"data/train/002_stories.parquet",
|
| 10 |
+
"data/train/003_oasst2.parquet"
|
| 11 |
],
|
| 12 |
+
"current_dataset": "data/train/004_alpaca.parquet",
|
| 13 |
+
"current_dataset_tokens": 4251648
|
| 14 |
}
|