Stage 2 checkpoint: [Step 100/20000] loss=7.5625
Browse files- README.md +4 -4
- stage2_checkpoint.pt +3 -0
- stage2_metadata.json +14 -0
README.md
CHANGED
|
@@ -57,10 +57,10 @@ Key research findings applied:
|
|
| 57 |
|
| 58 |
## Current Checkpoint
|
| 59 |
|
| 60 |
-
- **Stage**:
|
| 61 |
-
- **Step**:
|
| 62 |
-
- **Loss**:
|
| 63 |
-
- **Updated**: 2026-03-12T22:
|
| 64 |
|
| 65 |
## Languages
|
| 66 |
|
|
|
|
| 57 |
|
| 58 |
## Current Checkpoint
|
| 59 |
|
| 60 |
+
- **Stage**: 2 (kl-distillation)
|
| 61 |
+
- **Step**: 15
|
| 62 |
+
- **Loss**: 7.5977
|
| 63 |
+
- **Updated**: 2026-03-12T22:49:07.475098+00:00
|
| 64 |
|
| 65 |
## Languages
|
| 66 |
|
stage2_checkpoint.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:038e23318f8fc32cb75c0e3a3b376bb947427203c019e26c45b11914cb0e7106
|
| 3 |
+
size 1443376118
|
stage2_metadata.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"step": 15,
|
| 3 |
+
"checkpoint_file": "best.pt",
|
| 4 |
+
"timestamp": "2026-03-12T22:49:07.475098+00:00",
|
| 5 |
+
"final_loss": 7.59765625,
|
| 6 |
+
"best_loss": 7.59765625,
|
| 7 |
+
"training_config": {
|
| 8 |
+
"temperature": 2.0,
|
| 9 |
+
"alpha": 0.7,
|
| 10 |
+
"seed": 42
|
| 11 |
+
},
|
| 12 |
+
"stage": 2,
|
| 13 |
+
"stage_name": "kl-distillation"
|
| 14 |
+
}
|