duoduoyeah commited on
Commit
7c84874
·
verified ·
1 Parent(s): dc9c903

Add files using upload-large-folder tool

Browse files
pdlm_d8_bs1_pr1_r25_non_ca_samenoisy_seq512/base_checkpoints/d8/meta_005680.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "step": 5680,
3
+ "val_bpb": 0,
4
+ "model_config": {
5
+ "sequence_len": 512,
6
+ "pure_vocab_size": 4096,
7
+ "all_vocab_size": 4917,
8
+ "n_layer": 8,
9
+ "n_head": 4,
10
+ "n_kv_head": 4,
11
+ "n_embd": 512,
12
+ "prefix_pure_tokens": 1,
13
+ "mask_token_id": 4096,
14
+ "is_causal": false,
15
+ "model_name": "pdlm_d8_bs1_pr1_r25_non_ca_samenoisy_seq512"
16
+ },
17
+ "user_config": {
18
+ "run": "pdlm_d8_bs1_pr1_r25_non_ca_samenoisy_seq512",
19
+ "device_type": "",
20
+ "depth": 8,
21
+ "max_seq_len": 512,
22
+ "block_size": 1,
23
+ "prefix_pure_tokens": 1,
24
+ "is_causal": false,
25
+ "noise_total_steps": 0,
26
+ "debug": false,
27
+ "num_iterations": -1,
28
+ "target_flops": -1.0,
29
+ "target_param_data_ratio": 25,
30
+ "device_batch_size": 128,
31
+ "total_batch_size": 131072,
32
+ "embedding_lr": 0.2,
33
+ "unembedding_lr": 0.004,
34
+ "weight_decay": 0.0,
35
+ "matrix_lr": 0.02,
36
+ "grad_clip": 1.0,
37
+ "warmup_ratio": 0.0,
38
+ "warmdown_ratio": 0.2,
39
+ "final_lr_frac": 0.0,
40
+ "resume_from_step": -1,
41
+ "eval_every": -1,
42
+ "eval_tokens": 10485760,
43
+ "core_metric_every": -1,
44
+ "core_metric_max_per_task": 500,
45
+ "sample_every": 2000,
46
+ "save_every": 8000,
47
+ "model_tag": ""
48
+ },
49
+ "device_batch_size": 128,
50
+ "max_seq_len": 512,
51
+ "dataloader_state_dict": {
52
+ "pq_idx": 12,
53
+ "rg_idx": 23
54
+ },
55
+ "loop_state": {
56
+ "min_val_bpb": Infinity,
57
+ "smooth_train_loss": 1.0025086864123016,
58
+ "total_training_time": 3203.4706559181213
59
+ }
60
+ }
pdlm_d8_bs1_pr1_r25_non_ca_samenoisy_seq512/base_checkpoints/d8/model_005680.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27a610a8a5b64de7f3914d9cb8269e07cc4bc334076ee85275f95f40e6ae7489
3
+ size 114107621
pdlm_d8_bs1_pr1_r25_non_ca_samenoisy_seq512/base_checkpoints/d8/optim_005680_rank0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:197b183e6c39d1775865f51814abce09f0a6e606b36107593aa15f40e4865758
3
+ size 127528469
pdlm_d8_bs1_pr1_r25_non_ca_samenoisy_seq512/report/base-model-training.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Base model training
2
+ timestamp: 2026-01-07 19:28:33
3
+
4
+ - run: pdlm_d8_bs1_pr1_r25_non_ca_samenoisy_seq512
5
+ - device_type:
6
+ - depth: 8
7
+ - max_seq_len: 512
8
+ - block_size: 1
9
+ - prefix_pure_tokens: 1
10
+ - is_causal: False
11
+ - noise_total_steps: 0
12
+ - debug: False
13
+ - num_iterations: -1
14
+ - target_flops: -1.0000
15
+ - target_param_data_ratio: 25
16
+ - device_batch_size: 128
17
+ - total_batch_size: 131,072
18
+ - embedding_lr: 0.2000
19
+ - unembedding_lr: 0.0040
20
+ - weight_decay: 0.0000
21
+ - matrix_lr: 0.0200
22
+ - grad_clip: 1.0000
23
+ - warmup_ratio: 0.0000
24
+ - warmdown_ratio: 0.2000
25
+ - final_lr_frac: 0.0000
26
+ - resume_from_step: -1
27
+ - eval_every: -1
28
+ - eval_tokens: 10,485,760
29
+ - core_metric_every: -1
30
+ - core_metric_max_per_task: 500
31
+ - sample_every: 2000
32
+ - save_every: 8000
33
+ - model_tag:
34
+ - Number of parameters: 29,780,480
35
+ - Number of FLOPs per token: 1.887437e+08
36
+ - Calculated number of iterations: 5680
37
+ - Number of training tokens: 744,488,960
38
+ - Tokens : Params ratio: 24.9992
39
+ - DDP world size: 1
40
+ - warmup_ratio: 0.0000
41
+ - warmdown_ratio: 0.2000
42
+ - final_lr_frac: 0.0000
43
+ - Minimum validation bpb: inf
44
+ - Final validation bpb: 0
45
+ - CORE metric estimate: None
46
+ - MFU %: 4.41%
47
+ - Total training flops: 1.405176e+17
48
+ - Total training time: 53.39m
49
+ - Peak memory usage: 21208.98MiB
50
+
pdlm_d8_bs1_pr1_r25_non_ca_samenoisy_seq512/report/header.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # nanochat training report
2
+
3
+ Generated: 2026-01-07 18:33:54
4
+
5
+ ## Environment
6
+
7
+ ### Git Information
8
+ - Branch: tokenizer
9
+ - Commit: 9c7aff0 (clean)
10
+ - Message: eval dump the response and prefix
11
+
12
+ ### Hardware
13
+ - Platform: Linux
14
+ - CPUs: 6 cores (12 logical)
15
+ - Memory: 167.1 GB
16
+ - GPUs: 1x NVIDIA A100-SXM4-80GB
17
+ - GPU Memory: 79.3 GB total
18
+ - CUDA Version: 12.6
19
+ - Hourly Rate: $1.79/hour
20
+
21
+ ### Software
22
+ - Python: 3.12.12
23
+ - PyTorch: 2.9.0+cu126
24
+
25
+
26
+ ### Bloat
27
+ - Characters: 569,458
28
+ - Lines: 13,985
29
+ - Files: 79
30
+ - Tokens (approx): 142,364
31
+ - Dependencies (uv.lock lines): 2,749
32
+
33
+ Run started: 2026-01-07 18:33:54
34
+
35
+ ---
36
+
pdlm_d8_bs1_pr1_r25_non_ca_samenoisy_seq512/tokenizer/token_bytes.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c8b99c0d5a1b87b87118e840f69510440302023cd514b241614fb562373d7ce
3
+ size 17961
pdlm_d8_bs1_pr1_r25_non_ca_samenoisy_seq512/tokenizer/token_maps.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e018730860c8e77e8cbba4b57ac9c7ba6798b5926dce925743959b932a099964
3
+ size 1850237
pdlm_d8_bs1_pr1_r25_non_ca_samenoisy_seq512/tokenizer/tokenizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f874c4250ec76e2e8c4f97e91c55cfdf74d9f8eedaae14cd22db36bb718ee19
3
+ size 61662