step 5764 | val_ppl=11.03
Browse files- README.md +3 -3
- model.safetensors +1 -1
- resume/ckpt.pt +1 -1
- resume/latest_step.txt +1 -1
- training_meta.json +5 -5
README.md
CHANGED
|
@@ -25,9 +25,9 @@ Small language model (9.9M parameters) trained from scratch.
|
|
| 25 |
| Total parameters | 9.853M |
|
| 26 |
|
| 27 |
## Training
|
| 28 |
-
- Tokens seen:
|
| 29 |
-
- Val loss: 2.
|
| 30 |
-
- Val PPL: 11.
|
| 31 |
|
| 32 |
## Usage
|
| 33 |
```python
|
|
|
|
| 25 |
| Total parameters | 9.853M |
|
| 26 |
|
| 27 |
## Training
|
| 28 |
+
- Tokens seen: 6,038,089,728
|
| 29 |
+
- Val loss: 2.4005
|
| 30 |
+
- Val PPL: 11.03
|
| 31 |
|
| 32 |
## Usage
|
| 33 |
```python
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 39421616
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6bcf5bca66562ba232b8c2e6ff10bdd78b91677d646084352235e03991d3643
|
| 3 |
size 39421616
|
resume/ckpt.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 119812327
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5248e2263d2add874e58ab96db63390e1b10e2bee97925ea31ad990c8cc7062e
|
| 3 |
size 119812327
|
resume/latest_step.txt
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
5764
|
training_meta.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
-
"step":
|
| 3 |
-
"val_loss": 2.
|
| 4 |
-
"val_ppl": 11.
|
| 5 |
"params_M": 9.853,
|
| 6 |
-
"pushed_at": "2026-06-
|
| 7 |
-
"tokens_seen":
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"step": 5764,
|
| 3 |
+
"val_loss": 2.400488165664673,
|
| 4 |
+
"val_ppl": 11.028558830522986,
|
| 5 |
"params_M": 9.853,
|
| 6 |
+
"pushed_at": "2026-06-18T12:39:56.385405",
|
| 7 |
+
"tokens_seen": 6038089728
|
| 8 |
}
|