Upload folder using huggingface_hub
Browse files- README.md +22 -3
- checkpoints/000650/meta_000650.json +16 -0
- checkpoints/000650/model_000650.pt +3 -0
- report/chat-evaluation-sft.md +21 -0
- report/chat-sft.md +23 -0
- token_bytes.pt +3 -0
- tokenizer.pkl +3 -0
README.md
CHANGED
|
@@ -1,3 +1,22 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dingo d20 (All Intermediate Checkpoints)
|
| 2 |
+
|
| 3 |
+
Repo: jayasuryajsk/Dingo
|
| 4 |
+
Architecture: 20 layers, 10 heads, 10 KV heads, d_model=1280, seq_len=2048, vocab=65536
|
| 5 |
+
|
| 6 |
+
Each checkpoint is stored in:
|
| 7 |
+
checkpoints/<step>/{model_<step>.pt, meta_<step>.json}
|
| 8 |
+
|
| 9 |
+
Example eval (step 000650):
|
| 10 |
+
- MMLU: 32.62 %
|
| 11 |
+
- ARC-Easy: 44.82 %
|
| 12 |
+
- ARC-Challenge: 31.14 %
|
| 13 |
+
- GSM8K: 5.08 %
|
| 14 |
+
- HumanEval: 6.71 %
|
| 15 |
+
|
| 16 |
+
Load example (custom Dingo):
|
| 17 |
+
import torch, json
|
| 18 |
+
step="000650"
|
| 19 |
+
base="checkpoints"
|
| 20 |
+
ckpt = torch.load(f"{base}/{step}/model_{step}.pt", map_location="cpu")
|
| 21 |
+
with open(f"{base}/{step}/meta_{step}.json") as f:
|
| 22 |
+
meta = json.load(f)
|
checkpoints/000650/meta_000650.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"step": 650,
|
| 3 |
+
"val_loss": 1.067014455795288,
|
| 4 |
+
"mmlu_acc": 0.3359375,
|
| 5 |
+
"arc_easy_acc": 0.4365234375,
|
| 6 |
+
"gsm8k_acc": 0.046875,
|
| 7 |
+
"humaneval_acc": 0.046875,
|
| 8 |
+
"model_config": {
|
| 9 |
+
"sequence_len": 2048,
|
| 10 |
+
"vocab_size": 65536,
|
| 11 |
+
"n_layer": 20,
|
| 12 |
+
"n_head": 10,
|
| 13 |
+
"n_kv_head": 10,
|
| 14 |
+
"n_embd": 1280
|
| 15 |
+
}
|
| 16 |
+
}
|
checkpoints/000650/model_000650.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74e0093dc972469615c2bca3aed909d37783a3829ef21ce85e78de0ad9c14e38
|
| 3 |
+
size 2076230219
|
report/chat-evaluation-sft.md
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Chat evaluation sft
|
| 2 |
+
timestamp: 2025-10-15 14:45:38
|
| 3 |
+
|
| 4 |
+
- source: sft
|
| 5 |
+
- task_name: None
|
| 6 |
+
- dtype: bfloat16
|
| 7 |
+
- temperature: 0.0000
|
| 8 |
+
- max_new_tokens: 512
|
| 9 |
+
- num_samples: 1
|
| 10 |
+
- top_k: 50
|
| 11 |
+
- batch_size: 8
|
| 12 |
+
- model_tag: None
|
| 13 |
+
- step: None
|
| 14 |
+
- max_problems: None
|
| 15 |
+
- ARC-Easy: 0.4482
|
| 16 |
+
- ARC-Challenge: 0.3114
|
| 17 |
+
- MMLU: 0.3262
|
| 18 |
+
- GSM8K: 0.0508
|
| 19 |
+
- HumanEval: 0.0671
|
| 20 |
+
- ChatCORE metric: 0.1131
|
| 21 |
+
|
report/chat-sft.md
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Chat SFT
|
| 2 |
+
timestamp: 2025-10-15 14:39:09
|
| 3 |
+
|
| 4 |
+
- run: dummy
|
| 5 |
+
- source: mid
|
| 6 |
+
- dtype: bfloat16
|
| 7 |
+
- device_batch_size: 4
|
| 8 |
+
- num_epochs: 1
|
| 9 |
+
- max_iterations: -1
|
| 10 |
+
- target_examples_per_step: 32
|
| 11 |
+
- unembedding_lr: 0.0040
|
| 12 |
+
- embedding_lr: 0.2000
|
| 13 |
+
- matrix_lr: 0.0200
|
| 14 |
+
- weight_decay: 0.0000
|
| 15 |
+
- init_lr_frac: 0.0200
|
| 16 |
+
- eval_every: 100
|
| 17 |
+
- eval_steps: 100
|
| 18 |
+
- eval_metrics_every: 200
|
| 19 |
+
- Training rows: 20,843
|
| 20 |
+
- Number of iterations: 651
|
| 21 |
+
- Training loss: 1.2182
|
| 22 |
+
- Validation loss: 1.0670
|
| 23 |
+
|
token_bytes.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae39c27aae519d14071efc95f9a558ba0b7ede47e7d83ad4f198422b44c5f70e
|
| 3 |
+
size 263721
|
tokenizer.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c060565a46fe83b49d99005acba796f2a630daa7970eb49f7513b89f9fb40e0
|
| 3 |
+
size 846208
|