jayasuryajsk commited on
Commit
9f5367b
·
verified ·
1 Parent(s): 8920fb2

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,3 +1,22 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dingo d20 (All Intermediate Checkpoints)
2
+
3
+ Repo: jayasuryajsk/Dingo
4
+ Architecture: 20 layers, 10 heads, 10 KV heads, d_model=1280, seq_len=2048, vocab=65536
5
+
6
+ Each checkpoint is stored in:
7
+ checkpoints/<step>/{model_<step>.pt, meta_<step>.json}
8
+
9
+ Example eval (step 000650):
10
+ - MMLU: 32.62 %
11
+ - ARC-Easy: 44.82 %
12
+ - ARC-Challenge: 31.14 %
13
+ - GSM8K: 5.08 %
14
+ - HumanEval: 6.71 %
15
+
16
+ Load example (custom Dingo):
17
+ import torch, json
18
+ step="000650"
19
+ base="checkpoints"
20
+ ckpt = torch.load(f"{base}/{step}/model_{step}.pt", map_location="cpu")
21
+ with open(f"{base}/{step}/meta_{step}.json") as f:
22
+ meta = json.load(f)
checkpoints/000650/meta_000650.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "step": 650,
3
+ "val_loss": 1.067014455795288,
4
+ "mmlu_acc": 0.3359375,
5
+ "arc_easy_acc": 0.4365234375,
6
+ "gsm8k_acc": 0.046875,
7
+ "humaneval_acc": 0.046875,
8
+ "model_config": {
9
+ "sequence_len": 2048,
10
+ "vocab_size": 65536,
11
+ "n_layer": 20,
12
+ "n_head": 10,
13
+ "n_kv_head": 10,
14
+ "n_embd": 1280
15
+ }
16
+ }
checkpoints/000650/model_000650.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74e0093dc972469615c2bca3aed909d37783a3829ef21ce85e78de0ad9c14e38
3
+ size 2076230219
report/chat-evaluation-sft.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Chat evaluation sft
2
+ timestamp: 2025-10-15 14:45:38
3
+
4
+ - source: sft
5
+ - task_name: None
6
+ - dtype: bfloat16
7
+ - temperature: 0.0000
8
+ - max_new_tokens: 512
9
+ - num_samples: 1
10
+ - top_k: 50
11
+ - batch_size: 8
12
+ - model_tag: None
13
+ - step: None
14
+ - max_problems: None
15
+ - ARC-Easy: 0.4482
16
+ - ARC-Challenge: 0.3114
17
+ - MMLU: 0.3262
18
+ - GSM8K: 0.0508
19
+ - HumanEval: 0.0671
20
+ - ChatCORE metric: 0.1131
21
+
report/chat-sft.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Chat SFT
2
+ timestamp: 2025-10-15 14:39:09
3
+
4
+ - run: dummy
5
+ - source: mid
6
+ - dtype: bfloat16
7
+ - device_batch_size: 4
8
+ - num_epochs: 1
9
+ - max_iterations: -1
10
+ - target_examples_per_step: 32
11
+ - unembedding_lr: 0.0040
12
+ - embedding_lr: 0.2000
13
+ - matrix_lr: 0.0200
14
+ - weight_decay: 0.0000
15
+ - init_lr_frac: 0.0200
16
+ - eval_every: 100
17
+ - eval_steps: 100
18
+ - eval_metrics_every: 200
19
+ - Training rows: 20,843
20
+ - Number of iterations: 651
21
+ - Training loss: 1.2182
22
+ - Validation loss: 1.0670
23
+
token_bytes.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae39c27aae519d14071efc95f9a558ba0b7ede47e7d83ad4f198422b44c5f70e
3
+ size 263721
tokenizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c060565a46fe83b49d99005acba796f2a630daa7970eb49f7513b89f9fb40e0
3
+ size 846208