Tighten expected loss ranges for FineWeb-Edu dataset
Browse filesFineWeb-Edu is higher quality filtered data, so the model can achieve
lower loss at the same token count compared to generic web corpora.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
llm_lab/training/debugger.py
CHANGED
|
@@ -29,10 +29,10 @@ from llm_lab.config import TrainConfig
|
|
| 29 |
# Constants
|
| 30 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 31 |
|
| 32 |
-
# Normal convergence ranges for a 1B model trained on ~10B tokens
|
| 33 |
-
_EXPECTED_TRAIN_LOSS = (2.
|
| 34 |
-
_EXPECTED_VAL_LOSS = (
|
| 35 |
-
_EXPECTED_VAL_PPL = (
|
| 36 |
|
| 37 |
# Status labels
|
| 38 |
STATUS_NORMAL = "NORMAL"
|
|
|
|
| 29 |
# Constants
|
| 30 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 31 |
|
| 32 |
+
# Normal convergence ranges for a 1B model trained on ~10B tokens (FineWeb-Edu)
|
| 33 |
+
_EXPECTED_TRAIN_LOSS = (2.5, 3.3)
|
| 34 |
+
_EXPECTED_VAL_LOSS = (2.7, 3.6)
|
| 35 |
+
_EXPECTED_VAL_PPL = (15, 37)
|
| 36 |
|
| 37 |
# Status labels
|
| 38 |
STATUS_NORMAL = "NORMAL"
|