Commit ·
33306a2
1
Parent(s): f667ba4
Training run: 100K steps (squash-merge from run/run_20260325_002657_small_zesty-osprey)
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- checkpoints/step_00005000/.complete +9 -0
- checkpoints/step_00005000/config.json +39 -0
- checkpoints/step_00005000/model.safetensors +3 -0
- checkpoints/step_00005000/optimizer.safetensors +3 -0
- checkpoints/step_00005000/training_state.json +118 -0
- checkpoints/step_00010000/.complete +9 -0
- checkpoints/step_00010000/config.json +39 -0
- checkpoints/step_00010000/model.safetensors +3 -0
- checkpoints/step_00010000/optimizer.safetensors +3 -0
- checkpoints/step_00010000/training_state.json +118 -0
- checkpoints/step_00015000/.complete +9 -0
- checkpoints/step_00015000/config.json +39 -0
- checkpoints/step_00015000/model.safetensors +3 -0
- checkpoints/step_00015000/optimizer.safetensors +3 -0
- checkpoints/step_00015000/training_state.json +118 -0
- checkpoints/step_00020000/.complete +9 -0
- checkpoints/step_00020000/config.json +39 -0
- checkpoints/step_00020000/model.safetensors +3 -0
- checkpoints/step_00020000/optimizer.safetensors +3 -0
- checkpoints/step_00020000/training_state.json +118 -0
- checkpoints/step_00025000/.complete +9 -0
- checkpoints/step_00025000/config.json +39 -0
- checkpoints/step_00025000/model.safetensors +3 -0
- checkpoints/step_00025000/optimizer.safetensors +3 -0
- checkpoints/step_00025000/training_state.json +118 -0
- checkpoints/step_00030000/.complete +9 -0
- checkpoints/step_00030000/config.json +39 -0
- checkpoints/step_00030000/model.safetensors +3 -0
- checkpoints/step_00030000/optimizer.safetensors +3 -0
- checkpoints/step_00030000/training_state.json +118 -0
- checkpoints/step_00035000/.complete +9 -0
- checkpoints/step_00035000/config.json +39 -0
- checkpoints/step_00035000/model.safetensors +3 -0
- checkpoints/step_00035000/optimizer.safetensors +3 -0
- checkpoints/step_00035000/training_state.json +118 -0
- checkpoints/step_00040000/.complete +9 -0
- checkpoints/step_00040000/config.json +39 -0
- checkpoints/step_00040000/model.safetensors +3 -0
- checkpoints/step_00040000/optimizer.safetensors +3 -0
- checkpoints/step_00040000/training_state.json +118 -0
- checkpoints/step_00045000/.complete +9 -0
- checkpoints/step_00045000/config.json +39 -0
- checkpoints/step_00045000/model.safetensors +3 -0
- checkpoints/step_00045000/optimizer.safetensors +3 -0
- checkpoints/step_00045000/training_state.json +118 -0
- checkpoints/step_00050000/.complete +9 -0
- checkpoints/step_00050000/config.json +39 -0
- checkpoints/step_00050000/model.safetensors +3 -0
- checkpoints/step_00050000/optimizer.safetensors +3 -0
- checkpoints/step_00050000/training_state.json +118 -0
checkpoints/step_00005000/.complete
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"files": {
|
| 4 |
+
"config.json": "7d7a8111595963bc28a9b70cda8402bec27f0d6ca34b35fef100bfe5dba4ebeb",
|
| 5 |
+
"model.safetensors": "e1dfd0e8ce5ae4a99c615fcfd69db7eea08f558afa4add0a393e58a95bbe2b6c",
|
| 6 |
+
"optimizer.safetensors": "3b128d34d8547f6c52dfc4557bddca2ba1e2201c9385e0533423ebaff00aedc3",
|
| 7 |
+
"training_state.json": "09b2dd8e437b70954caccdea7708d0585f85d7e4b9481a5b6ca4d91fa358e530"
|
| 8 |
+
}
|
| 9 |
+
}
|
checkpoints/step_00005000/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"checkpoint_type": "pretrain",
|
| 4 |
+
"model_config": {
|
| 5 |
+
"vocab_size": 4278,
|
| 6 |
+
"max_seq_len": 256,
|
| 7 |
+
"n_outcomes": 5,
|
| 8 |
+
"d_model": 256,
|
| 9 |
+
"n_layers": 8,
|
| 10 |
+
"n_heads": 4,
|
| 11 |
+
"d_ff": 1024,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"rope_base": 10000.0
|
| 14 |
+
},
|
| 15 |
+
"training_config": {
|
| 16 |
+
"lr": 0.0003,
|
| 17 |
+
"weight_decay": 0.01,
|
| 18 |
+
"max_grad_norm": 1.0,
|
| 19 |
+
"warmup_steps": 1000,
|
| 20 |
+
"total_steps": 100000,
|
| 21 |
+
"batch_size": 256,
|
| 22 |
+
"max_ply": 256,
|
| 23 |
+
"discard_ply_limit": false,
|
| 24 |
+
"num_workers": 4,
|
| 25 |
+
"use_amp": true,
|
| 26 |
+
"accumulation_steps": 1,
|
| 27 |
+
"log_interval": 10,
|
| 28 |
+
"eval_interval": 500,
|
| 29 |
+
"checkpoint_interval": 5000,
|
| 30 |
+
"base_seed": 42,
|
| 31 |
+
"val_seed": 9223372036854775807,
|
| 32 |
+
"val_games": 512,
|
| 33 |
+
"checkpoint_dir": "checkpoints",
|
| 34 |
+
"log_dir": "logs",
|
| 35 |
+
"use_wandb": false,
|
| 36 |
+
"wandb_project": "pawn",
|
| 37 |
+
"device": "cuda"
|
| 38 |
+
}
|
| 39 |
+
}
|
checkpoints/step_00005000/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1dfd0e8ce5ae4a99c615fcfd69db7eea08f558afa4add0a393e58a95bbe2b6c
|
| 3 |
+
size 38102280
|
checkpoints/step_00005000/optimizer.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b128d34d8547f6c52dfc4557bddca2ba1e2201c9385e0533423ebaff00aedc3
|
| 3 |
+
size 76210148
|
checkpoints/step_00005000/training_state.json
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"global_step": 5000,
|
| 4 |
+
"scheduler_state_dict": {
|
| 5 |
+
"step": 5000
|
| 6 |
+
},
|
| 7 |
+
"scaler_state_dict": {
|
| 8 |
+
"scale": 262144.0,
|
| 9 |
+
"growth_factor": 2.0,
|
| 10 |
+
"backoff_factor": 0.5,
|
| 11 |
+
"growth_interval": 2000,
|
| 12 |
+
"_growth_tracker": 1000
|
| 13 |
+
},
|
| 14 |
+
"optimizer_meta": {
|
| 15 |
+
"param_groups": [
|
| 16 |
+
{
|
| 17 |
+
"lr": 0.00029891389973215736,
|
| 18 |
+
"betas": [
|
| 19 |
+
0.9,
|
| 20 |
+
0.999
|
| 21 |
+
],
|
| 22 |
+
"eps": 1e-08,
|
| 23 |
+
"weight_decay": 0.01,
|
| 24 |
+
"amsgrad": false,
|
| 25 |
+
"maximize": false,
|
| 26 |
+
"foreach": null,
|
| 27 |
+
"capturable": false,
|
| 28 |
+
"differentiable": false,
|
| 29 |
+
"fused": null,
|
| 30 |
+
"decoupled_weight_decay": true,
|
| 31 |
+
"params": [
|
| 32 |
+
0,
|
| 33 |
+
1,
|
| 34 |
+
2,
|
| 35 |
+
3,
|
| 36 |
+
4,
|
| 37 |
+
5,
|
| 38 |
+
6,
|
| 39 |
+
7,
|
| 40 |
+
8,
|
| 41 |
+
9,
|
| 42 |
+
10,
|
| 43 |
+
11,
|
| 44 |
+
12,
|
| 45 |
+
13,
|
| 46 |
+
14,
|
| 47 |
+
15,
|
| 48 |
+
16,
|
| 49 |
+
17,
|
| 50 |
+
18,
|
| 51 |
+
19,
|
| 52 |
+
20,
|
| 53 |
+
21,
|
| 54 |
+
22,
|
| 55 |
+
23,
|
| 56 |
+
24,
|
| 57 |
+
25,
|
| 58 |
+
26,
|
| 59 |
+
27,
|
| 60 |
+
28,
|
| 61 |
+
29,
|
| 62 |
+
30,
|
| 63 |
+
31,
|
| 64 |
+
32,
|
| 65 |
+
33,
|
| 66 |
+
34,
|
| 67 |
+
35,
|
| 68 |
+
36,
|
| 69 |
+
37,
|
| 70 |
+
38,
|
| 71 |
+
39,
|
| 72 |
+
40,
|
| 73 |
+
41,
|
| 74 |
+
42,
|
| 75 |
+
43,
|
| 76 |
+
44,
|
| 77 |
+
45,
|
| 78 |
+
46,
|
| 79 |
+
47,
|
| 80 |
+
48,
|
| 81 |
+
49,
|
| 82 |
+
50,
|
| 83 |
+
51,
|
| 84 |
+
52,
|
| 85 |
+
53,
|
| 86 |
+
54,
|
| 87 |
+
55,
|
| 88 |
+
56,
|
| 89 |
+
57,
|
| 90 |
+
58,
|
| 91 |
+
59,
|
| 92 |
+
60,
|
| 93 |
+
61,
|
| 94 |
+
62,
|
| 95 |
+
63,
|
| 96 |
+
64,
|
| 97 |
+
65,
|
| 98 |
+
66,
|
| 99 |
+
67,
|
| 100 |
+
68,
|
| 101 |
+
69,
|
| 102 |
+
70,
|
| 103 |
+
71,
|
| 104 |
+
72,
|
| 105 |
+
73,
|
| 106 |
+
74,
|
| 107 |
+
75,
|
| 108 |
+
76,
|
| 109 |
+
77,
|
| 110 |
+
78
|
| 111 |
+
]
|
| 112 |
+
}
|
| 113 |
+
],
|
| 114 |
+
"scalars": null
|
| 115 |
+
},
|
| 116 |
+
"torch_rng_state": "h08rNwf38rOvAAAAAQAAAMIBAAAAAAAA5bVTKAAAAAC1CIbqAAAAAA3rKqAAAAAAGqogVAAAAABigsYrAAAAAAK5m78AAAAAcE43pAAAAAB+YHT7AAAAABSmyhcAAAAAiwIFRwAAAABru0i1AAAAAEAG8YYAAAAAyvLLfgAAAADgU+nAAAAAAFqEKAgAAAAA72kt/gAAAAC/V34GAAAAAJRKvuQAAAAARwrwvAAAAAB8+1buAAAAAHw2FqQAAAAATEURKwAAAADppdGbAAAAALFK1WsAAAAA9FjHJwAAAAAKRlfWAAAAAAxZlR8AAAAA7PS73AAAAABeXBUoAAAAAIA1IjIAAAAAC2JcuwAAAACKVAI/AAAAAJ4PbRkAAAAA/OdaFAAAAAB2VuHVAAAAAOjy5bMAAAAAKivF/gAAAACA5OnEAAAAAC+aNjMAAAAABNa0HQAAAABgCGjwAAAAACVYnKcAAAAAYnQAHgAAAACed89WAAAAAMk1u3oAAAAAUKriUQAAAAC9t94zAAAAAPbZbRcAAAAAGOth4wAAAAAs80jFAAAAAKRt/bQAAAAAa6dpYQAAAABipCt+AAAAAN56VTYAAAAAXpizpwAAAABgJXGrAAAAAFiUPsEAAAAAvOynQAAAAACWnN22AAAAAEPkFa4AAAAAsqkleAAAAAAVdN9zAAAAAHvqifQAAAAAA0n6LwAAAACAuTx4AAAAAD6Z93cAAAAApS7GTwAAAADjfskvAAAAAA+ewj4AAAAAl9DQLAAAAAD9kSH/AAAAAEA+GYgAAAAAvsbprwAAAAB3NN+0AAAAANc6YrwAAAAAnfkv2gAAAACMQfjeAAAAAGA0pCgAAAAAvTnvHAAAAAC4jLlTAAAAALc9p4wAAAAAZjDhogAAAADAlSqiAAAAAAzt0eMAAAAAguPkrgAAAACEQa6HAAAAAFfNjdEAAAAAGoYWVAAAAABF3yVKAAAAADgonYgAAAAAb6PyRAAAAACG7abbAAAAABqiUFAAAAAA0SzSDAAAAACeoR0/AAAAAOgi1CcAAAAA9Dw29QAAAABFfJluAAAAAP8tr7AAAAAAaLAamQAAAABjRZ8PAAAAALEV3aUAAAAAAIRuuwAAAACHGl7bAAAAAIcEIUUAAAAASi25VAAAAADR+U0fAAAAAOWbOB4AAAAAmvLGFAAAAAAJNxX1AAAAAHtE2ScAAAAAk2NOTgAAAAD+65BYAAAAAOsZFekAAAAAWIKxJAAAAABiKM6BAAAAAE1YYQEAAAAA2sop3wAAAABCIl6iAAAAAP/T7yYAAAAAWZNJcQAAAABFOZsSAAAAAFzETvMAAAAAcFP0/AAAAADurpxIAAAAAIaDQdMAAAAAsS/3TAAAAAA0YVttAAAAAGstmGoAAAAAQqLbLAAAAABXzRZ4AAAAANqZ/18AAAAAdVwB9AAAAADsfpg2AAAAAMy2zKoAAAAACSy3KAAAAABDzRafAAAAAAzi404AAAAA3pVkeQAAAAAQ5caLAAAAAHXDzEcAAAAAS59+2QAAAABV2J9+AAAAAMejV/YAAAAAKwKUxwAAAADKYpLRAAAAAPvntMAAAAAA5v2IfQAAAAC9+IekAAAAABN+sCwAAAAAgOOzyQAAAABjjSj0AAAAAG2AsasAAAAA4CMoZgAAAAAHpm84AAAAANC8aZAAAAAAfiO4WQAAAAAzUg0HAAAAAGyLSTsAAAAAXBS5MQAAAADs/KtnAAAAAJO9YCIAAAAAnoesBAAAAAD0ogF9AAAAABKv9CQAAAAA1gbspQAAAABmhttUAAAAAOJ/TG4AAAAAhS0PpAAAAADx5KyDAAAAADmNIvEAAAAA5KJXxAAAAAAGOjP/AAAAADzZdKcAAAAAA8FPFgAAAAALDaYcAAAAAB0U9pgAAAAAr2EncwAAAABok7yTAAAAACRvTNIAAAAAcQKJkQAAAAAvcQE8AAAAALcp3hEAAAAA9ChDqgAAAACs3b0CAAAAAHROJI0AAAAA1R3GBgAAAAA6qGxzAAAAAOeWbGcAAAAAU9x6SgAAAADH/vP6AAAAAKOI8XIAAAAA8KA2VQAAAAAfGgFFAAAAANtgjI0AAAAAsGlzhQAAAABVwSsRAAAAAGXdNw0AAAAAbQSpewAAAAAx9MXlAAAAAMmuVXQAAAAAkTwfDwAAAAB9+wFIAAAAALLWklQAAAAASRZ3CgAAAABCqD25AAAAADrP138AAAAAA9Li1AAAAABj0TYLAAAAAHNsR/oAAAAA2maP7gAAAAB/GyXyAAAAAEsu8MgAAAAAspXNQgAAAAC1Xgb/AAAAALkz90EAAAAA48lYngAAAABm+qG1AAAAAMcXfW0AAAAAE90WugAAAADnSDIFAAAAAJmpDgYAAAAAnAkyCAAAAAD/iCk2AAAAAElp5lsAAAAAeHAxnAAAAADbQgcKAAAAAJ8CEbIAAAAAVY4u5wAAAAAOrWjNAAAAAMMgCpAAAAAAt+qJqQAAAACUbpfvAAAAAFTRiEYAAAAAi7yHngAAAAAyQ1DYAAAAAGJgz/QAAAAA7solGQAAAABxT00rAAAAAArI5bYAAAAARGqyPAAAAABFr7c7AAAAAFEE51wAAAAAum0rtgAAAAAMHHNDAAAAAM3EjogAAAAA4o430QAAAADk7fE+AAAAAAViqkEAAAAAGes0MwAAAABdjpDvAAAAAEbUJRQAAAAAdrnGOgAAAABrQWIuAAAAACvnK2sAAAAAEyRlJgAAAAC/12WyAAAAAEBI67MAAAAAtGGEZwAAAABGu6sdAAAAALuyVuYAAAAAgx0QzgAAAABtMTt3AAAAAMjVDVsAAAAARt941wAAAAAgkpeRAAAAAFLnED4AAAAAjv2/6wAAAADia1FwAAAAAJnk6jwAAAAAJ3jF/AAAAAAhFYtcAAAAAD0VY2UAAAAAd3N11gAAAACzKnTWAAAAAGIujsQAAAAAooiH2gAAAAB/yEYxAAAAADjjTNQAAAAASNHEhwAAAABtncUMAAAAADSX9KIAAAAAbiZNSAAAAACwhbuVAAAAALLTcSYAAAAAmSzpFAAAAABq7Ct3AAAAAPWp3D4AAAAAAe4gpwAAAACsTU9qAAAAAP2tI4QAAAAARNCL7QAAAAA06+aVAAAAAJoNQN0AAAAAJWtPhwAAAAAXEn70AAAAAJssGqkAAAAAAKFQUQAAAACT9kT0AAAAAFu3JOUAAAAANO5gIAAAAAB+VuXIAAAAAPHeI4QAAAAADT7B/wAAAADfokXMAAAAAJfg/f0AAAAArlfEXAAAAAAkYKSUAAAAAEQpA7IAAAAALJqctgAAAACjeaRfAAAAABqE3tEAAAAA0rJopAAAAAD4yLv2AAAAAFOxR8MAAAAAeEtDDwAAAACwX8hdAAAAAKjoZU0AAAAAV26whwAAAAC5K7eKAAAAACxfVTIAAAAA7fBnGAAAAACd50/yAAAAAFP5Jh8AAAAAiSQPKAAAAADlfY5NAAAAAP/8zuIAAAAAoS9UEgAAAAD7pTW4AAAAAHB5cVIAAAAARbh83AAAAADaocV3AAAAAG7HtekAAAAAmNEfcwAAAAD3YdvOAAAAAJPCtA4AAAAAoxmOwwAAAABrPuElAAAAAMak4r8AAAAAAUC/cgAAAAAhZCuIAAAAAN2mf+kAAAAAx83h5gAAAAAmbYMwAAAAAJiCnrQAAAAAQU55rAAAAADw+3SUAAAAAM/2XnEAAAAAcU07SwAAAADA5e+DAAAAAHttOgkAAAAAiGbszAAAAABluI1oAAAAAH2uALwAAAAAGUF/5wAAAABmMR5KAAAAALSCYAUAAAAAiKGWXwAAAAAnvn1PAAAAAAqH+bsAAAAABwJn9QAAAAAzsIpwAAAAABrZ0GcAAAAAhuYe5wAAAABO+FTkAAAAAM2/KvIAAAAAsRPS3gAAAACdx440AAAAAP4tumYAAAAA+bmCzQAAAADJHxanAAAAAPXucQIAAAAAonSCwAAAAACgjOtnAAAAABoWVc0AAAAAkdoAbwAAAACz7aNzAAAAAKW4ruYAAAAA8cr63gAAAABE6SNJAAAAAD2BPGMAAAAAdnPyEgAAAAD8U+XcAAAAAAcdXwAAAAAAEvif/wAAAAD8rlYaAAAAACG0El4AAAAA3437sgAAAAALlh8WAAAAAG9kxgcAAAAAdz6Z+QAAAACUVLt6AAAAAMOofScAAAAAiWSpbAAAAACQguXTAAAAAFroExIAAAAApWtqzgAAAABNjjRLAAAAAGf7vuUAAAAAHZvizwAAAAAHed+7AAAAABQIe3UAAAAA4XtGOAAAAAASc2AdAAAAANHNDk8AAAAAJzMFKAAAAAAXXfpMAAAAABu0ce0AAAAA4/NkJgAAAAA/+1OAAAAAADu2hoIAAAAAZmCDfwAAAADXQL4fAAAAAAS4ug0AAAAAG+OmAgAAAADu60ldAAAAAMsuxCUAAAAAj8UWaQAAAACr0RxfAAAAAAMdSFQAAAAA36GiRwAAAABk4uomAAAAAFcoM5EAAAAAs+yK7gAAAABcwIvCAAAAAIIBImQAAAAAfG9I6AAAAAAn4QM2AAAAALGuHggAAAAAcaGswgAAAACpMriNAAAAAI7aXk0AAAAAZTO/DgAAAABLMd42AAAAAEU6FlIAAAAAPk5rDAAAAADcO1eQAAAAABikK+4AAAAA6IPhsAAAAAALNbnjAAAAAH5zJeMAAAAAkpFEPwAAAABcpLkXAAAAAEQpk1gAAAAAg1qEDQAAAABVMEyiAAAAANdjUEYAAAAAlsVqdgAAAAAvFPWeAAAAAKqzNcIAAAAA98nh2wAAAAD29MteAAAAAAO9kOUAAAAAt+873wAAAAAAY/ZgAAAAAFdmymYAAAAAh6+t/wAAAACAUVNnAAAAAGFVacwAAAAAhn/nDQAAAADUmhjPAAAAAHmQWBgAAAAAql9jiwAAAACFC0uIAAAAAMPDQeUAAAAAR0NZ1QAAAABBn5deAAAAAB5TwDYAAAAAJuVeBQAAAABkVY1bAAAAAFMthYgAAAAA3ZKyXQAAAABC6R5uAAAAAPHG1l8AAAAA9cob5QAAAABoNpXQAAAAAADVHtoAAAAAoXyWdAAAAABzlNNLAAAAAAd7JTwAAAAAFQy61gAAAADNezk1AAAAAD3N/EMAAAAAqi9OHQAAAADu9V/uAAAAALEpjVkAAAAAy2UzWwAAAAC2chu5AAAAAIVrQEIAAAAAItdI3gAAAADERnBmAAAAAA34/pwAAAAARc9ZfAAAAAAlM5R9AAAAAMUAqCsAAAAAf48ghQAAAADY/1U9AAAAAGIcDcwAAAAAocqYuAAAAAA0etJoAAAAALJFCoYAAAAA3+LS9gAAAAAG6CGqAAAAAIZELuoAAAAAesiBRAAAAADIDXwHAAAAANY8KSQAAAAAzq0RkAAAAADNNOjsAAAAABZVTTsAAAAAMOJYbAAAAABNxQyWAAAAAOtIPlMAAAAAoxn9VQAAAAAyfQpBAAAAAFCxFm4AAAAAeul3ewAAAADixnHqAAAAACqMW+EAAAAAByiTLAAAAACfpwVeAAAAADppPwAAAAAANCiqtgAAAABv7Qs5AAAAAP5TNKcAAAAAOMhxeQAAAAAww45yAAAAALOIDngAAAAApdPt+gAAAABO/05GAAAAAHDeD98AAAAAH7Ye8QAAAADnCh6QAAAAANAEPSwAAAAAlwTimwAAAAB2scxWAAAAAMq2rHkAAAAAB/b8EwAAAAAQSW3SAAAAAMuKLnsAAAAAmRG5UAAAAAC2sGuCAAAAAIpjqUEAAAAAgvq4QgAAAABBhm3kAAAAAGa9p+kAAAAAJYzu4QAAAADz1dQfAAAAANC6C5YAAAAAbvuQowAAAABJEwlLAAAAAHwIY4oAAAAASP5riAAAAABzZpcJAAAAAKrZpgYAAAAAWaJRswAAAADVqREtAAAAAKjAN84AAAAACawW3wAAAADrqt14AAAAADA/MwwAAAAA6VoIkQAAAADKRR/qAAAAAGMhqpUAAAAA2gUL3QAAAADMhsOJAAAAAHy9Ru8AAAAApXZ9wwAAAAA6EmlLAAAAAM6zmT0AAAAAkPNYqQAAAAD6QheVAAAAAJ82DQgAAAAAqH9qbAAAAACFQMpFAAAAAMH5ZNsAAAAAq5UO+QAAAABRdDH1AAAAAMJR3gsAAAAAlcaqzQAAAADzWWEdAAAAAIQKqtoAAAAAwrvNwAAAAAD17N09AAAAABGwSVwAAAAA+rnH+AAAAAARTa6vAAAAAJLCGA4AAAAAIGRTswAAAAAbOwKbAAAAAFyofAUAAAAALDaxtAAAAADSpDyBAAAAABSlmB8AAAAAEX19IwAAAACEwHF8AAAAADXsqPwAAAAAe5IE4QAAAACZqU9pAAAAAGzMKdEAAAAAZuqdjQAAAABnaWP4AAAAAPW3T6wAAAAApqmHCgAAAACkUeS0AAAAANd0gnIAAAAAJ1219gAAAADYblUyAAAAANaeA3MAAAAA2JW20QAAAABE13o1AAAAAHE31nUAAAAAmABkGAAAAABaQFzGAAAAAKeAVTQAAAAAOU+S1wAAAABpLdz8AAAAAHOfZiIAAAAAKWLzEgAAAABSkJu/AAAAAKedPQQAAAAACtxWlAAAAABVHrLEAAAAAIgBSwMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
|
| 117 |
+
"cuda_rng_state": "BWVGVHP/DAAAAAAAAAAAAA=="
|
| 118 |
+
}
|
checkpoints/step_00010000/.complete
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"files": {
|
| 4 |
+
"config.json": "7d7a8111595963bc28a9b70cda8402bec27f0d6ca34b35fef100bfe5dba4ebeb",
|
| 5 |
+
"model.safetensors": "602c4f73967b5dfbaf35a94ac322f2dfd7f93e1197aa5d661309ea65ffc98917",
|
| 6 |
+
"optimizer.safetensors": "807bd7a1891bfc7ea69de3a1c650d74408c1f31419af68a2af5962b6331c4e63",
|
| 7 |
+
"training_state.json": "1567974d5e6511f984d2f0a1c6ad7febc430b7124543fd3316e6918f41693a6c"
|
| 8 |
+
}
|
| 9 |
+
}
|
checkpoints/step_00010000/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"checkpoint_type": "pretrain",
|
| 4 |
+
"model_config": {
|
| 5 |
+
"vocab_size": 4278,
|
| 6 |
+
"max_seq_len": 256,
|
| 7 |
+
"n_outcomes": 5,
|
| 8 |
+
"d_model": 256,
|
| 9 |
+
"n_layers": 8,
|
| 10 |
+
"n_heads": 4,
|
| 11 |
+
"d_ff": 1024,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"rope_base": 10000.0
|
| 14 |
+
},
|
| 15 |
+
"training_config": {
|
| 16 |
+
"lr": 0.0003,
|
| 17 |
+
"weight_decay": 0.01,
|
| 18 |
+
"max_grad_norm": 1.0,
|
| 19 |
+
"warmup_steps": 1000,
|
| 20 |
+
"total_steps": 100000,
|
| 21 |
+
"batch_size": 256,
|
| 22 |
+
"max_ply": 256,
|
| 23 |
+
"discard_ply_limit": false,
|
| 24 |
+
"num_workers": 4,
|
| 25 |
+
"use_amp": true,
|
| 26 |
+
"accumulation_steps": 1,
|
| 27 |
+
"log_interval": 10,
|
| 28 |
+
"eval_interval": 500,
|
| 29 |
+
"checkpoint_interval": 5000,
|
| 30 |
+
"base_seed": 42,
|
| 31 |
+
"val_seed": 9223372036854775807,
|
| 32 |
+
"val_games": 512,
|
| 33 |
+
"checkpoint_dir": "checkpoints",
|
| 34 |
+
"log_dir": "logs",
|
| 35 |
+
"use_wandb": false,
|
| 36 |
+
"wandb_project": "pawn",
|
| 37 |
+
"device": "cuda"
|
| 38 |
+
}
|
| 39 |
+
}
|
checkpoints/step_00010000/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:602c4f73967b5dfbaf35a94ac322f2dfd7f93e1197aa5d661309ea65ffc98917
|
| 3 |
+
size 38102280
|
checkpoints/step_00010000/optimizer.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:807bd7a1891bfc7ea69de3a1c650d74408c1f31419af68a2af5962b6331c4e63
|
| 3 |
+
size 76210148
|
checkpoints/step_00010000/training_state.json
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"global_step": 10000,
|
| 4 |
+
"scheduler_state_dict": {
|
| 5 |
+
"step": 10000
|
| 6 |
+
},
|
| 7 |
+
"scaler_state_dict": {
|
| 8 |
+
"scale": 524288.0,
|
| 9 |
+
"growth_factor": 2.0,
|
| 10 |
+
"backoff_factor": 0.5,
|
| 11 |
+
"growth_interval": 2000,
|
| 12 |
+
"_growth_tracker": 1704
|
| 13 |
+
},
|
| 14 |
+
"optimizer_meta": {
|
| 15 |
+
"param_groups": [
|
| 16 |
+
{
|
| 17 |
+
"lr": 0.00029453155143795715,
|
| 18 |
+
"betas": [
|
| 19 |
+
0.9,
|
| 20 |
+
0.999
|
| 21 |
+
],
|
| 22 |
+
"eps": 1e-08,
|
| 23 |
+
"weight_decay": 0.01,
|
| 24 |
+
"amsgrad": false,
|
| 25 |
+
"maximize": false,
|
| 26 |
+
"foreach": null,
|
| 27 |
+
"capturable": false,
|
| 28 |
+
"differentiable": false,
|
| 29 |
+
"fused": null,
|
| 30 |
+
"decoupled_weight_decay": true,
|
| 31 |
+
"params": [
|
| 32 |
+
0,
|
| 33 |
+
1,
|
| 34 |
+
2,
|
| 35 |
+
3,
|
| 36 |
+
4,
|
| 37 |
+
5,
|
| 38 |
+
6,
|
| 39 |
+
7,
|
| 40 |
+
8,
|
| 41 |
+
9,
|
| 42 |
+
10,
|
| 43 |
+
11,
|
| 44 |
+
12,
|
| 45 |
+
13,
|
| 46 |
+
14,
|
| 47 |
+
15,
|
| 48 |
+
16,
|
| 49 |
+
17,
|
| 50 |
+
18,
|
| 51 |
+
19,
|
| 52 |
+
20,
|
| 53 |
+
21,
|
| 54 |
+
22,
|
| 55 |
+
23,
|
| 56 |
+
24,
|
| 57 |
+
25,
|
| 58 |
+
26,
|
| 59 |
+
27,
|
| 60 |
+
28,
|
| 61 |
+
29,
|
| 62 |
+
30,
|
| 63 |
+
31,
|
| 64 |
+
32,
|
| 65 |
+
33,
|
| 66 |
+
34,
|
| 67 |
+
35,
|
| 68 |
+
36,
|
| 69 |
+
37,
|
| 70 |
+
38,
|
| 71 |
+
39,
|
| 72 |
+
40,
|
| 73 |
+
41,
|
| 74 |
+
42,
|
| 75 |
+
43,
|
| 76 |
+
44,
|
| 77 |
+
45,
|
| 78 |
+
46,
|
| 79 |
+
47,
|
| 80 |
+
48,
|
| 81 |
+
49,
|
| 82 |
+
50,
|
| 83 |
+
51,
|
| 84 |
+
52,
|
| 85 |
+
53,
|
| 86 |
+
54,
|
| 87 |
+
55,
|
| 88 |
+
56,
|
| 89 |
+
57,
|
| 90 |
+
58,
|
| 91 |
+
59,
|
| 92 |
+
60,
|
| 93 |
+
61,
|
| 94 |
+
62,
|
| 95 |
+
63,
|
| 96 |
+
64,
|
| 97 |
+
65,
|
| 98 |
+
66,
|
| 99 |
+
67,
|
| 100 |
+
68,
|
| 101 |
+
69,
|
| 102 |
+
70,
|
| 103 |
+
71,
|
| 104 |
+
72,
|
| 105 |
+
73,
|
| 106 |
+
74,
|
| 107 |
+
75,
|
| 108 |
+
76,
|
| 109 |
+
77,
|
| 110 |
+
78
|
| 111 |
+
]
|
| 112 |
+
}
|
| 113 |
+
],
|
| 114 |
+
"scalars": null
|
| 115 |
+
},
|
| 116 |
+
"torch_rng_state": "h08rNwf38rOvAAAAAQAAAMIBAAAAAAAA5bVTKAAAAAC1CIbqAAAAAA3rKqAAAAAAGqogVAAAAABigsYrAAAAAAK5m78AAAAAcE43pAAAAAB+YHT7AAAAABSmyhcAAAAAiwIFRwAAAABru0i1AAAAAEAG8YYAAAAAyvLLfgAAAADgU+nAAAAAAFqEKAgAAAAA72kt/gAAAAC/V34GAAAAAJRKvuQAAAAARwrwvAAAAAB8+1buAAAAAHw2FqQAAAAATEURKwAAAADppdGbAAAAALFK1WsAAAAA9FjHJwAAAAAKRlfWAAAAAAxZlR8AAAAA7PS73AAAAABeXBUoAAAAAIA1IjIAAAAAC2JcuwAAAACKVAI/AAAAAJ4PbRkAAAAA/OdaFAAAAAB2VuHVAAAAAOjy5bMAAAAAKivF/gAAAACA5OnEAAAAAC+aNjMAAAAABNa0HQAAAABgCGjwAAAAACVYnKcAAAAAYnQAHgAAAACed89WAAAAAMk1u3oAAAAAUKriUQAAAAC9t94zAAAAAPbZbRcAAAAAGOth4wAAAAAs80jFAAAAAKRt/bQAAAAAa6dpYQAAAABipCt+AAAAAN56VTYAAAAAXpizpwAAAABgJXGrAAAAAFiUPsEAAAAAvOynQAAAAACWnN22AAAAAEPkFa4AAAAAsqkleAAAAAAVdN9zAAAAAHvqifQAAAAAA0n6LwAAAACAuTx4AAAAAD6Z93cAAAAApS7GTwAAAADjfskvAAAAAA+ewj4AAAAAl9DQLAAAAAD9kSH/AAAAAEA+GYgAAAAAvsbprwAAAAB3NN+0AAAAANc6YrwAAAAAnfkv2gAAAACMQfjeAAAAAGA0pCgAAAAAvTnvHAAAAAC4jLlTAAAAALc9p4wAAAAAZjDhogAAAADAlSqiAAAAAAzt0eMAAAAAguPkrgAAAACEQa6HAAAAAFfNjdEAAAAAGoYWVAAAAABF3yVKAAAAADgonYgAAAAAb6PyRAAAAACG7abbAAAAABqiUFAAAAAA0SzSDAAAAACeoR0/AAAAAOgi1CcAAAAA9Dw29QAAAABFfJluAAAAAP8tr7AAAAAAaLAamQAAAABjRZ8PAAAAALEV3aUAAAAAAIRuuwAAAACHGl7bAAAAAIcEIUUAAAAASi25VAAAAADR+U0fAAAAAOWbOB4AAAAAmvLGFAAAAAAJNxX1AAAAAHtE2ScAAAAAk2NOTgAAAAD+65BYAAAAAOsZFekAAAAAWIKxJAAAAABiKM6BAAAAAE1YYQEAAAAA2sop3wAAAABCIl6iAAAAAP/T7yYAAAAAWZNJcQAAAABFOZsSAAAAAFzETvMAAAAAcFP0/AAAAADurpxIAAAAAIaDQdMAAAAAsS/3TAAAAAA0YVttAAAAAGstmGoAAAAAQqLbLAAAAABXzRZ4AAAAANqZ/18AAAAAdVwB9AAAAADsfpg2AAAAAMy2zKoAAAAACSy3KAAAAABDzRafAAAAAAzi404AAAAA3pVkeQAAAAAQ5caLAAAAAHXDzEcAAAAAS59+2QAAAABV2J9+AAAAAMejV/YAAAAAKwKUxwAAAADKYpLRAAAAAPvntMAAAAAA5v2IfQAAAAC9+IekAAAAABN+sCwAAAAAgOOzyQAAAABjjSj0AAAAAG2AsasAAAAA4CMoZgAAAAAHpm84AAAAANC8aZAAAAAAfiO4WQAAAAAzUg0HAAAAAGyLSTsAAAAAXBS5MQAAAADs/KtnAAAAAJO9YCIAAAAAnoesBAAAAAD0ogF9AAAAABKv9CQAAAAA1gbspQAAAABmhttUAAAAAOJ/TG4AAAAAhS0PpAAAAADx5KyDAAAAADmNIvEAAAAA5KJXxAAAAAAGOjP/AAAAADzZdKcAAAAAA8FPFgAAAAALDaYcAAAAAB0U9pgAAAAAr2EncwAAAABok7yTAAAAACRvTNIAAAAAcQKJkQAAAAAvcQE8AAAAALcp3hEAAAAA9ChDqgAAAACs3b0CAAAAAHROJI0AAAAA1R3GBgAAAAA6qGxzAAAAAOeWbGcAAAAAU9x6SgAAAADH/vP6AAAAAKOI8XIAAAAA8KA2VQAAAAAfGgFFAAAAANtgjI0AAAAAsGlzhQAAAABVwSsRAAAAAGXdNw0AAAAAbQSpewAAAAAx9MXlAAAAAMmuVXQAAAAAkTwfDwAAAAB9+wFIAAAAALLWklQAAAAASRZ3CgAAAABCqD25AAAAADrP138AAAAAA9Li1AAAAABj0TYLAAAAAHNsR/oAAAAA2maP7gAAAAB/GyXyAAAAAEsu8MgAAAAAspXNQgAAAAC1Xgb/AAAAALkz90EAAAAA48lYngAAAABm+qG1AAAAAMcXfW0AAAAAE90WugAAAADnSDIFAAAAAJmpDgYAAAAAnAkyCAAAAAD/iCk2AAAAAElp5lsAAAAAeHAxnAAAAADbQgcKAAAAAJ8CEbIAAAAAVY4u5wAAAAAOrWjNAAAAAMMgCpAAAAAAt+qJqQAAAACUbpfvAAAAAFTRiEYAAAAAi7yHngAAAAAyQ1DYAAAAAGJgz/QAAAAA7solGQAAAABxT00rAAAAAArI5bYAAAAARGqyPAAAAABFr7c7AAAAAFEE51wAAAAAum0rtgAAAAAMHHNDAAAAAM3EjogAAAAA4o430QAAAADk7fE+AAAAAAViqkEAAAAAGes0MwAAAABdjpDvAAAAAEbUJRQAAAAAdrnGOgAAAABrQWIuAAAAACvnK2sAAAAAEyRlJgAAAAC/12WyAAAAAEBI67MAAAAAtGGEZwAAAABGu6sdAAAAALuyVuYAAAAAgx0QzgAAAABtMTt3AAAAAMjVDVsAAAAARt941wAAAAAgkpeRAAAAAFLnED4AAAAAjv2/6wAAAADia1FwAAAAAJnk6jwAAAAAJ3jF/AAAAAAhFYtcAAAAAD0VY2UAAAAAd3N11gAAAACzKnTWAAAAAGIujsQAAAAAooiH2gAAAAB/yEYxAAAAADjjTNQAAAAASNHEhwAAAABtncUMAAAAADSX9KIAAAAAbiZNSAAAAACwhbuVAAAAALLTcSYAAAAAmSzpFAAAAABq7Ct3AAAAAPWp3D4AAAAAAe4gpwAAAACsTU9qAAAAAP2tI4QAAAAARNCL7QAAAAA06+aVAAAAAJoNQN0AAAAAJWtPhwAAAAAXEn70AAAAAJssGqkAAAAAAKFQUQAAAACT9kT0AAAAAFu3JOUAAAAANO5gIAAAAAB+VuXIAAAAAPHeI4QAAAAADT7B/wAAAADfokXMAAAAAJfg/f0AAAAArlfEXAAAAAAkYKSUAAAAAEQpA7IAAAAALJqctgAAAACjeaRfAAAAABqE3tEAAAAA0rJopAAAAAD4yLv2AAAAAFOxR8MAAAAAeEtDDwAAAACwX8hdAAAAAKjoZU0AAAAAV26whwAAAAC5K7eKAAAAACxfVTIAAAAA7fBnGAAAAACd50/yAAAAAFP5Jh8AAAAAiSQPKAAAAADlfY5NAAAAAP/8zuIAAAAAoS9UEgAAAAD7pTW4AAAAAHB5cVIAAAAARbh83AAAAADaocV3AAAAAG7HtekAAAAAmNEfcwAAAAD3YdvOAAAAAJPCtA4AAAAAoxmOwwAAAABrPuElAAAAAMak4r8AAAAAAUC/cgAAAAAhZCuIAAAAAN2mf+kAAAAAx83h5gAAAAAmbYMwAAAAAJiCnrQAAAAAQU55rAAAAADw+3SUAAAAAM/2XnEAAAAAcU07SwAAAADA5e+DAAAAAHttOgkAAAAAiGbszAAAAABluI1oAAAAAH2uALwAAAAAGUF/5wAAAABmMR5KAAAAALSCYAUAAAAAiKGWXwAAAAAnvn1PAAAAAAqH+bsAAAAABwJn9QAAAAAzsIpwAAAAABrZ0GcAAAAAhuYe5wAAAABO+FTkAAAAAM2/KvIAAAAAsRPS3gAAAACdx440AAAAAP4tumYAAAAA+bmCzQAAAADJHxanAAAAAPXucQIAAAAAonSCwAAAAACgjOtnAAAAABoWVc0AAAAAkdoAbwAAAACz7aNzAAAAAKW4ruYAAAAA8cr63gAAAABE6SNJAAAAAD2BPGMAAAAAdnPyEgAAAAD8U+XcAAAAAAcdXwAAAAAAEvif/wAAAAD8rlYaAAAAACG0El4AAAAA3437sgAAAAALlh8WAAAAAG9kxgcAAAAAdz6Z+QAAAACUVLt6AAAAAMOofScAAAAAiWSpbAAAAACQguXTAAAAAFroExIAAAAApWtqzgAAAABNjjRLAAAAAGf7vuUAAAAAHZvizwAAAAAHed+7AAAAABQIe3UAAAAA4XtGOAAAAAASc2AdAAAAANHNDk8AAAAAJzMFKAAAAAAXXfpMAAAAABu0ce0AAAAA4/NkJgAAAAA/+1OAAAAAADu2hoIAAAAAZmCDfwAAAADXQL4fAAAAAAS4ug0AAAAAG+OmAgAAAADu60ldAAAAAMsuxCUAAAAAj8UWaQAAAACr0RxfAAAAAAMdSFQAAAAA36GiRwAAAABk4uomAAAAAFcoM5EAAAAAs+yK7gAAAABcwIvCAAAAAIIBImQAAAAAfG9I6AAAAAAn4QM2AAAAALGuHggAAAAAcaGswgAAAACpMriNAAAAAI7aXk0AAAAAZTO/DgAAAABLMd42AAAAAEU6FlIAAAAAPk5rDAAAAADcO1eQAAAAABikK+4AAAAA6IPhsAAAAAALNbnjAAAAAH5zJeMAAAAAkpFEPwAAAABcpLkXAAAAAEQpk1gAAAAAg1qEDQAAAABVMEyiAAAAANdjUEYAAAAAlsVqdgAAAAAvFPWeAAAAAKqzNcIAAAAA98nh2wAAAAD29MteAAAAAAO9kOUAAAAAt+873wAAAAAAY/ZgAAAAAFdmymYAAAAAh6+t/wAAAACAUVNnAAAAAGFVacwAAAAAhn/nDQAAAADUmhjPAAAAAHmQWBgAAAAAql9jiwAAAACFC0uIAAAAAMPDQeUAAAAAR0NZ1QAAAABBn5deAAAAAB5TwDYAAAAAJuVeBQAAAABkVY1bAAAAAFMthYgAAAAA3ZKyXQAAAABC6R5uAAAAAPHG1l8AAAAA9cob5QAAAABoNpXQAAAAAADVHtoAAAAAoXyWdAAAAABzlNNLAAAAAAd7JTwAAAAAFQy61gAAAADNezk1AAAAAD3N/EMAAAAAqi9OHQAAAADu9V/uAAAAALEpjVkAAAAAy2UzWwAAAAC2chu5AAAAAIVrQEIAAAAAItdI3gAAAADERnBmAAAAAA34/pwAAAAARc9ZfAAAAAAlM5R9AAAAAMUAqCsAAAAAf48ghQAAAADY/1U9AAAAAGIcDcwAAAAAocqYuAAAAAA0etJoAAAAALJFCoYAAAAA3+LS9gAAAAAG6CGqAAAAAIZELuoAAAAAesiBRAAAAADIDXwHAAAAANY8KSQAAAAAzq0RkAAAAADNNOjsAAAAABZVTTsAAAAAMOJYbAAAAABNxQyWAAAAAOtIPlMAAAAAoxn9VQAAAAAyfQpBAAAAAFCxFm4AAAAAeul3ewAAAADixnHqAAAAACqMW+EAAAAAByiTLAAAAACfpwVeAAAAADppPwAAAAAANCiqtgAAAABv7Qs5AAAAAP5TNKcAAAAAOMhxeQAAAAAww45yAAAAALOIDngAAAAApdPt+gAAAABO/05GAAAAAHDeD98AAAAAH7Ye8QAAAADnCh6QAAAAANAEPSwAAAAAlwTimwAAAAB2scxWAAAAAMq2rHkAAAAAB/b8EwAAAAAQSW3SAAAAAMuKLnsAAAAAmRG5UAAAAAC2sGuCAAAAAIpjqUEAAAAAgvq4QgAAAABBhm3kAAAAAGa9p+kAAAAAJYzu4QAAAADz1dQfAAAAANC6C5YAAAAAbvuQowAAAABJEwlLAAAAAHwIY4oAAAAASP5riAAAAABzZpcJAAAAAKrZpgYAAAAAWaJRswAAAADVqREtAAAAAKjAN84AAAAACawW3wAAAADrqt14AAAAADA/MwwAAAAA6VoIkQAAAADKRR/qAAAAAGMhqpUAAAAA2gUL3QAAAADMhsOJAAAAAHy9Ru8AAAAApXZ9wwAAAAA6EmlLAAAAAM6zmT0AAAAAkPNYqQAAAAD6QheVAAAAAJ82DQgAAAAAqH9qbAAAAACFQMpFAAAAAMH5ZNsAAAAAq5UO+QAAAABRdDH1AAAAAMJR3gsAAAAAlcaqzQAAAADzWWEdAAAAAIQKqtoAAAAAwrvNwAAAAAD17N09AAAAABGwSVwAAAAA+rnH+AAAAAARTa6vAAAAAJLCGA4AAAAAIGRTswAAAAAbOwKbAAAAAFyofAUAAAAALDaxtAAAAADSpDyBAAAAABSlmB8AAAAAEX19IwAAAACEwHF8AAAAADXsqPwAAAAAe5IE4QAAAACZqU9pAAAAAGzMKdEAAAAAZuqdjQAAAABnaWP4AAAAAPW3T6wAAAAApqmHCgAAAACkUeS0AAAAANd0gnIAAAAAJ1219gAAAADYblUyAAAAANaeA3MAAAAA2JW20QAAAABE13o1AAAAAHE31nUAAAAAmABkGAAAAABaQFzGAAAAAKeAVTQAAAAAOU+S1wAAAABpLdz8AAAAAHOfZiIAAAAAKWLzEgAAAABSkJu/AAAAAKedPQQAAAAACtxWlAAAAABVHrLEAAAAAIgBSwMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
|
| 117 |
+
"cuda_rng_state": "BWVGVHP/DAAAAAAAAAAAAA=="
|
| 118 |
+
}
|
checkpoints/step_00015000/.complete
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"files": {
|
| 4 |
+
"config.json": "7d7a8111595963bc28a9b70cda8402bec27f0d6ca34b35fef100bfe5dba4ebeb",
|
| 5 |
+
"model.safetensors": "e0ae52f4e649eacd7b535165e4f266e7fc7d4b21e9f296150fe9725a14760486",
|
| 6 |
+
"optimizer.safetensors": "aeb4943471ea63d5fd7d30ea5d98ff70a89af7b18a28f1d28e13b63c6b74d945",
|
| 7 |
+
"training_state.json": "083682ab7a6b330be3ce076eb3b70e90b60c32c2cdf635a67da01774860e4278"
|
| 8 |
+
}
|
| 9 |
+
}
|
checkpoints/step_00015000/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"checkpoint_type": "pretrain",
|
| 4 |
+
"model_config": {
|
| 5 |
+
"vocab_size": 4278,
|
| 6 |
+
"max_seq_len": 256,
|
| 7 |
+
"n_outcomes": 5,
|
| 8 |
+
"d_model": 256,
|
| 9 |
+
"n_layers": 8,
|
| 10 |
+
"n_heads": 4,
|
| 11 |
+
"d_ff": 1024,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"rope_base": 10000.0
|
| 14 |
+
},
|
| 15 |
+
"training_config": {
|
| 16 |
+
"lr": 0.0003,
|
| 17 |
+
"weight_decay": 0.01,
|
| 18 |
+
"max_grad_norm": 1.0,
|
| 19 |
+
"warmup_steps": 1000,
|
| 20 |
+
"total_steps": 100000,
|
| 21 |
+
"batch_size": 256,
|
| 22 |
+
"max_ply": 256,
|
| 23 |
+
"discard_ply_limit": false,
|
| 24 |
+
"num_workers": 4,
|
| 25 |
+
"use_amp": true,
|
| 26 |
+
"accumulation_steps": 1,
|
| 27 |
+
"log_interval": 10,
|
| 28 |
+
"eval_interval": 500,
|
| 29 |
+
"checkpoint_interval": 5000,
|
| 30 |
+
"base_seed": 42,
|
| 31 |
+
"val_seed": 9223372036854775807,
|
| 32 |
+
"val_games": 512,
|
| 33 |
+
"checkpoint_dir": "checkpoints",
|
| 34 |
+
"log_dir": "logs",
|
| 35 |
+
"use_wandb": false,
|
| 36 |
+
"wandb_project": "pawn",
|
| 37 |
+
"device": "cuda"
|
| 38 |
+
}
|
| 39 |
+
}
|
checkpoints/step_00015000/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e0ae52f4e649eacd7b535165e4f266e7fc7d4b21e9f296150fe9725a14760486
|
| 3 |
+
size 38102280
|
checkpoints/step_00015000/optimizer.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb4943471ea63d5fd7d30ea5d98ff70a89af7b18a28f1d28e13b63c6b74d945
|
| 3 |
+
size 76210148
|
checkpoints/step_00015000/training_state.json
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"global_step": 15000,
|
| 4 |
+
"scheduler_state_dict": {
|
| 5 |
+
"step": 15000
|
| 6 |
+
},
|
| 7 |
+
"scaler_state_dict": {
|
| 8 |
+
"scale": 1048576.0,
|
| 9 |
+
"growth_factor": 2.0,
|
| 10 |
+
"backoff_factor": 0.5,
|
| 11 |
+
"growth_interval": 2000,
|
| 12 |
+
"_growth_tracker": 681
|
| 13 |
+
},
|
| 14 |
+
"optimizer_meta": {
|
| 15 |
+
"param_groups": [
|
| 16 |
+
{
|
| 17 |
+
"lr": 0.0002868950826686939,
|
| 18 |
+
"betas": [
|
| 19 |
+
0.9,
|
| 20 |
+
0.999
|
| 21 |
+
],
|
| 22 |
+
"eps": 1e-08,
|
| 23 |
+
"weight_decay": 0.01,
|
| 24 |
+
"amsgrad": false,
|
| 25 |
+
"maximize": false,
|
| 26 |
+
"foreach": null,
|
| 27 |
+
"capturable": false,
|
| 28 |
+
"differentiable": false,
|
| 29 |
+
"fused": null,
|
| 30 |
+
"decoupled_weight_decay": true,
|
| 31 |
+
"params": [
|
| 32 |
+
0,
|
| 33 |
+
1,
|
| 34 |
+
2,
|
| 35 |
+
3,
|
| 36 |
+
4,
|
| 37 |
+
5,
|
| 38 |
+
6,
|
| 39 |
+
7,
|
| 40 |
+
8,
|
| 41 |
+
9,
|
| 42 |
+
10,
|
| 43 |
+
11,
|
| 44 |
+
12,
|
| 45 |
+
13,
|
| 46 |
+
14,
|
| 47 |
+
15,
|
| 48 |
+
16,
|
| 49 |
+
17,
|
| 50 |
+
18,
|
| 51 |
+
19,
|
| 52 |
+
20,
|
| 53 |
+
21,
|
| 54 |
+
22,
|
| 55 |
+
23,
|
| 56 |
+
24,
|
| 57 |
+
25,
|
| 58 |
+
26,
|
| 59 |
+
27,
|
| 60 |
+
28,
|
| 61 |
+
29,
|
| 62 |
+
30,
|
| 63 |
+
31,
|
| 64 |
+
32,
|
| 65 |
+
33,
|
| 66 |
+
34,
|
| 67 |
+
35,
|
| 68 |
+
36,
|
| 69 |
+
37,
|
| 70 |
+
38,
|
| 71 |
+
39,
|
| 72 |
+
40,
|
| 73 |
+
41,
|
| 74 |
+
42,
|
| 75 |
+
43,
|
| 76 |
+
44,
|
| 77 |
+
45,
|
| 78 |
+
46,
|
| 79 |
+
47,
|
| 80 |
+
48,
|
| 81 |
+
49,
|
| 82 |
+
50,
|
| 83 |
+
51,
|
| 84 |
+
52,
|
| 85 |
+
53,
|
| 86 |
+
54,
|
| 87 |
+
55,
|
| 88 |
+
56,
|
| 89 |
+
57,
|
| 90 |
+
58,
|
| 91 |
+
59,
|
| 92 |
+
60,
|
| 93 |
+
61,
|
| 94 |
+
62,
|
| 95 |
+
63,
|
| 96 |
+
64,
|
| 97 |
+
65,
|
| 98 |
+
66,
|
| 99 |
+
67,
|
| 100 |
+
68,
|
| 101 |
+
69,
|
| 102 |
+
70,
|
| 103 |
+
71,
|
| 104 |
+
72,
|
| 105 |
+
73,
|
| 106 |
+
74,
|
| 107 |
+
75,
|
| 108 |
+
76,
|
| 109 |
+
77,
|
| 110 |
+
78
|
| 111 |
+
]
|
| 112 |
+
}
|
| 113 |
+
],
|
| 114 |
+
"scalars": null
|
| 115 |
+
},
|
| 116 |
+
"torch_rng_state": "h08rNwf38rOvAAAAAQAAAMIBAAAAAAAA5bVTKAAAAAC1CIbqAAAAAA3rKqAAAAAAGqogVAAAAABigsYrAAAAAAK5m78AAAAAcE43pAAAAAB+YHT7AAAAABSmyhcAAAAAiwIFRwAAAABru0i1AAAAAEAG8YYAAAAAyvLLfgAAAADgU+nAAAAAAFqEKAgAAAAA72kt/gAAAAC/V34GAAAAAJRKvuQAAAAARwrwvAAAAAB8+1buAAAAAHw2FqQAAAAATEURKwAAAADppdGbAAAAALFK1WsAAAAA9FjHJwAAAAAKRlfWAAAAAAxZlR8AAAAA7PS73AAAAABeXBUoAAAAAIA1IjIAAAAAC2JcuwAAAACKVAI/AAAAAJ4PbRkAAAAA/OdaFAAAAAB2VuHVAAAAAOjy5bMAAAAAKivF/gAAAACA5OnEAAAAAC+aNjMAAAAABNa0HQAAAABgCGjwAAAAACVYnKcAAAAAYnQAHgAAAACed89WAAAAAMk1u3oAAAAAUKriUQAAAAC9t94zAAAAAPbZbRcAAAAAGOth4wAAAAAs80jFAAAAAKRt/bQAAAAAa6dpYQAAAABipCt+AAAAAN56VTYAAAAAXpizpwAAAABgJXGrAAAAAFiUPsEAAAAAvOynQAAAAACWnN22AAAAAEPkFa4AAAAAsqkleAAAAAAVdN9zAAAAAHvqifQAAAAAA0n6LwAAAACAuTx4AAAAAD6Z93cAAAAApS7GTwAAAADjfskvAAAAAA+ewj4AAAAAl9DQLAAAAAD9kSH/AAAAAEA+GYgAAAAAvsbprwAAAAB3NN+0AAAAANc6YrwAAAAAnfkv2gAAAACMQfjeAAAAAGA0pCgAAAAAvTnvHAAAAAC4jLlTAAAAALc9p4wAAAAAZjDhogAAAADAlSqiAAAAAAzt0eMAAAAAguPkrgAAAACEQa6HAAAAAFfNjdEAAAAAGoYWVAAAAABF3yVKAAAAADgonYgAAAAAb6PyRAAAAACG7abbAAAAABqiUFAAAAAA0SzSDAAAAACeoR0/AAAAAOgi1CcAAAAA9Dw29QAAAABFfJluAAAAAP8tr7AAAAAAaLAamQAAAABjRZ8PAAAAALEV3aUAAAAAAIRuuwAAAACHGl7bAAAAAIcEIUUAAAAASi25VAAAAADR+U0fAAAAAOWbOB4AAAAAmvLGFAAAAAAJNxX1AAAAAHtE2ScAAAAAk2NOTgAAAAD+65BYAAAAAOsZFekAAAAAWIKxJAAAAABiKM6BAAAAAE1YYQEAAAAA2sop3wAAAABCIl6iAAAAAP/T7yYAAAAAWZNJcQAAAABFOZsSAAAAAFzETvMAAAAAcFP0/AAAAADurpxIAAAAAIaDQdMAAAAAsS/3TAAAAAA0YVttAAAAAGstmGoAAAAAQqLbLAAAAABXzRZ4AAAAANqZ/18AAAAAdVwB9AAAAADsfpg2AAAAAMy2zKoAAAAACSy3KAAAAABDzRafAAAAAAzi404AAAAA3pVkeQAAAAAQ5caLAAAAAHXDzEcAAAAAS59+2QAAAABV2J9+AAAAAMejV/YAAAAAKwKUxwAAAADKYpLRAAAAAPvntMAAAAAA5v2IfQAAAAC9+IekAAAAABN+sCwAAAAAgOOzyQAAAABjjSj0AAAAAG2AsasAAAAA4CMoZgAAAAAHpm84AAAAANC8aZAAAAAAfiO4WQAAAAAzUg0HAAAAAGyLSTsAAAAAXBS5MQAAAADs/KtnAAAAAJO9YCIAAAAAnoesBAAAAAD0ogF9AAAAABKv9CQAAAAA1gbspQAAAABmhttUAAAAAOJ/TG4AAAAAhS0PpAAAAADx5KyDAAAAADmNIvEAAAAA5KJXxAAAAAAGOjP/AAAAADzZdKcAAAAAA8FPFgAAAAALDaYcAAAAAB0U9pgAAAAAr2EncwAAAABok7yTAAAAACRvTNIAAAAAcQKJkQAAAAAvcQE8AAAAALcp3hEAAAAA9ChDqgAAAACs3b0CAAAAAHROJI0AAAAA1R3GBgAAAAA6qGxzAAAAAOeWbGcAAAAAU9x6SgAAAADH/vP6AAAAAKOI8XIAAAAA8KA2VQAAAAAfGgFFAAAAANtgjI0AAAAAsGlzhQAAAABVwSsRAAAAAGXdNw0AAAAAbQSpewAAAAAx9MXlAAAAAMmuVXQAAAAAkTwfDwAAAAB9+wFIAAAAALLWklQAAAAASRZ3CgAAAABCqD25AAAAADrP138AAAAAA9Li1AAAAABj0TYLAAAAAHNsR/oAAAAA2maP7gAAAAB/GyXyAAAAAEsu8MgAAAAAspXNQgAAAAC1Xgb/AAAAALkz90EAAAAA48lYngAAAABm+qG1AAAAAMcXfW0AAAAAE90WugAAAADnSDIFAAAAAJmpDgYAAAAAnAkyCAAAAAD/iCk2AAAAAElp5lsAAAAAeHAxnAAAAADbQgcKAAAAAJ8CEbIAAAAAVY4u5wAAAAAOrWjNAAAAAMMgCpAAAAAAt+qJqQAAAACUbpfvAAAAAFTRiEYAAAAAi7yHngAAAAAyQ1DYAAAAAGJgz/QAAAAA7solGQAAAABxT00rAAAAAArI5bYAAAAARGqyPAAAAABFr7c7AAAAAFEE51wAAAAAum0rtgAAAAAMHHNDAAAAAM3EjogAAAAA4o430QAAAADk7fE+AAAAAAViqkEAAAAAGes0MwAAAABdjpDvAAAAAEbUJRQAAAAAdrnGOgAAAABrQWIuAAAAACvnK2sAAAAAEyRlJgAAAAC/12WyAAAAAEBI67MAAAAAtGGEZwAAAABGu6sdAAAAALuyVuYAAAAAgx0QzgAAAABtMTt3AAAAAMjVDVsAAAAARt941wAAAAAgkpeRAAAAAFLnED4AAAAAjv2/6wAAAADia1FwAAAAAJnk6jwAAAAAJ3jF/AAAAAAhFYtcAAAAAD0VY2UAAAAAd3N11gAAAACzKnTWAAAAAGIujsQAAAAAooiH2gAAAAB/yEYxAAAAADjjTNQAAAAASNHEhwAAAABtncUMAAAAADSX9KIAAAAAbiZNSAAAAACwhbuVAAAAALLTcSYAAAAAmSzpFAAAAABq7Ct3AAAAAPWp3D4AAAAAAe4gpwAAAACsTU9qAAAAAP2tI4QAAAAARNCL7QAAAAA06+aVAAAAAJoNQN0AAAAAJWtPhwAAAAAXEn70AAAAAJssGqkAAAAAAKFQUQAAAACT9kT0AAAAAFu3JOUAAAAANO5gIAAAAAB+VuXIAAAAAPHeI4QAAAAADT7B/wAAAADfokXMAAAAAJfg/f0AAAAArlfEXAAAAAAkYKSUAAAAAEQpA7IAAAAALJqctgAAAACjeaRfAAAAABqE3tEAAAAA0rJopAAAAAD4yLv2AAAAAFOxR8MAAAAAeEtDDwAAAACwX8hdAAAAAKjoZU0AAAAAV26whwAAAAC5K7eKAAAAACxfVTIAAAAA7fBnGAAAAACd50/yAAAAAFP5Jh8AAAAAiSQPKAAAAADlfY5NAAAAAP/8zuIAAAAAoS9UEgAAAAD7pTW4AAAAAHB5cVIAAAAARbh83AAAAADaocV3AAAAAG7HtekAAAAAmNEfcwAAAAD3YdvOAAAAAJPCtA4AAAAAoxmOwwAAAABrPuElAAAAAMak4r8AAAAAAUC/cgAAAAAhZCuIAAAAAN2mf+kAAAAAx83h5gAAAAAmbYMwAAAAAJiCnrQAAAAAQU55rAAAAADw+3SUAAAAAM/2XnEAAAAAcU07SwAAAADA5e+DAAAAAHttOgkAAAAAiGbszAAAAABluI1oAAAAAH2uALwAAAAAGUF/5wAAAABmMR5KAAAAALSCYAUAAAAAiKGWXwAAAAAnvn1PAAAAAAqH+bsAAAAABwJn9QAAAAAzsIpwAAAAABrZ0GcAAAAAhuYe5wAAAABO+FTkAAAAAM2/KvIAAAAAsRPS3gAAAACdx440AAAAAP4tumYAAAAA+bmCzQAAAADJHxanAAAAAPXucQIAAAAAonSCwAAAAACgjOtnAAAAABoWVc0AAAAAkdoAbwAAAACz7aNzAAAAAKW4ruYAAAAA8cr63gAAAABE6SNJAAAAAD2BPGMAAAAAdnPyEgAAAAD8U+XcAAAAAAcdXwAAAAAAEvif/wAAAAD8rlYaAAAAACG0El4AAAAA3437sgAAAAALlh8WAAAAAG9kxgcAAAAAdz6Z+QAAAACUVLt6AAAAAMOofScAAAAAiWSpbAAAAACQguXTAAAAAFroExIAAAAApWtqzgAAAABNjjRLAAAAAGf7vuUAAAAAHZvizwAAAAAHed+7AAAAABQIe3UAAAAA4XtGOAAAAAASc2AdAAAAANHNDk8AAAAAJzMFKAAAAAAXXfpMAAAAABu0ce0AAAAA4/NkJgAAAAA/+1OAAAAAADu2hoIAAAAAZmCDfwAAAADXQL4fAAAAAAS4ug0AAAAAG+OmAgAAAADu60ldAAAAAMsuxCUAAAAAj8UWaQAAAACr0RxfAAAAAAMdSFQAAAAA36GiRwAAAABk4uomAAAAAFcoM5EAAAAAs+yK7gAAAABcwIvCAAAAAIIBImQAAAAAfG9I6AAAAAAn4QM2AAAAALGuHggAAAAAcaGswgAAAACpMriNAAAAAI7aXk0AAAAAZTO/DgAAAABLMd42AAAAAEU6FlIAAAAAPk5rDAAAAADcO1eQAAAAABikK+4AAAAA6IPhsAAAAAALNbnjAAAAAH5zJeMAAAAAkpFEPwAAAABcpLkXAAAAAEQpk1gAAAAAg1qEDQAAAABVMEyiAAAAANdjUEYAAAAAlsVqdgAAAAAvFPWeAAAAAKqzNcIAAAAA98nh2wAAAAD29MteAAAAAAO9kOUAAAAAt+873wAAAAAAY/ZgAAAAAFdmymYAAAAAh6+t/wAAAACAUVNnAAAAAGFVacwAAAAAhn/nDQAAAADUmhjPAAAAAHmQWBgAAAAAql9jiwAAAACFC0uIAAAAAMPDQeUAAAAAR0NZ1QAAAABBn5deAAAAAB5TwDYAAAAAJuVeBQAAAABkVY1bAAAAAFMthYgAAAAA3ZKyXQAAAABC6R5uAAAAAPHG1l8AAAAA9cob5QAAAABoNpXQAAAAAADVHtoAAAAAoXyWdAAAAABzlNNLAAAAAAd7JTwAAAAAFQy61gAAAADNezk1AAAAAD3N/EMAAAAAqi9OHQAAAADu9V/uAAAAALEpjVkAAAAAy2UzWwAAAAC2chu5AAAAAIVrQEIAAAAAItdI3gAAAADERnBmAAAAAA34/pwAAAAARc9ZfAAAAAAlM5R9AAAAAMUAqCsAAAAAf48ghQAAAADY/1U9AAAAAGIcDcwAAAAAocqYuAAAAAA0etJoAAAAALJFCoYAAAAA3+LS9gAAAAAG6CGqAAAAAIZELuoAAAAAesiBRAAAAADIDXwHAAAAANY8KSQAAAAAzq0RkAAAAADNNOjsAAAAABZVTTsAAAAAMOJYbAAAAABNxQyWAAAAAOtIPlMAAAAAoxn9VQAAAAAyfQpBAAAAAFCxFm4AAAAAeul3ewAAAADixnHqAAAAACqMW+EAAAAAByiTLAAAAACfpwVeAAAAADppPwAAAAAANCiqtgAAAABv7Qs5AAAAAP5TNKcAAAAAOMhxeQAAAAAww45yAAAAALOIDngAAAAApdPt+gAAAABO/05GAAAAAHDeD98AAAAAH7Ye8QAAAADnCh6QAAAAANAEPSwAAAAAlwTimwAAAAB2scxWAAAAAMq2rHkAAAAAB/b8EwAAAAAQSW3SAAAAAMuKLnsAAAAAmRG5UAAAAAC2sGuCAAAAAIpjqUEAAAAAgvq4QgAAAABBhm3kAAAAAGa9p+kAAAAAJYzu4QAAAADz1dQfAAAAANC6C5YAAAAAbvuQowAAAABJEwlLAAAAAHwIY4oAAAAASP5riAAAAABzZpcJAAAAAKrZpgYAAAAAWaJRswAAAADVqREtAAAAAKjAN84AAAAACawW3wAAAADrqt14AAAAADA/MwwAAAAA6VoIkQAAAADKRR/qAAAAAGMhqpUAAAAA2gUL3QAAAADMhsOJAAAAAHy9Ru8AAAAApXZ9wwAAAAA6EmlLAAAAAM6zmT0AAAAAkPNYqQAAAAD6QheVAAAAAJ82DQgAAAAAqH9qbAAAAACFQMpFAAAAAMH5ZNsAAAAAq5UO+QAAAABRdDH1AAAAAMJR3gsAAAAAlcaqzQAAAADzWWEdAAAAAIQKqtoAAAAAwrvNwAAAAAD17N09AAAAABGwSVwAAAAA+rnH+AAAAAARTa6vAAAAAJLCGA4AAAAAIGRTswAAAAAbOwKbAAAAAFyofAUAAAAALDaxtAAAAADSpDyBAAAAABSlmB8AAAAAEX19IwAAAACEwHF8AAAAADXsqPwAAAAAe5IE4QAAAACZqU9pAAAAAGzMKdEAAAAAZuqdjQAAAABnaWP4AAAAAPW3T6wAAAAApqmHCgAAAACkUeS0AAAAANd0gnIAAAAAJ1219gAAAADYblUyAAAAANaeA3MAAAAA2JW20QAAAABE13o1AAAAAHE31nUAAAAAmABkGAAAAABaQFzGAAAAAKeAVTQAAAAAOU+S1wAAAABpLdz8AAAAAHOfZiIAAAAAKWLzEgAAAABSkJu/AAAAAKedPQQAAAAACtxWlAAAAABVHrLEAAAAAIgBSwMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
|
| 117 |
+
"cuda_rng_state": "BWVGVHP/DAAAAAAAAAAAAA=="
|
| 118 |
+
}
|
checkpoints/step_00020000/.complete
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"files": {
|
| 4 |
+
"config.json": "7d7a8111595963bc28a9b70cda8402bec27f0d6ca34b35fef100bfe5dba4ebeb",
|
| 5 |
+
"model.safetensors": "6e0247e396727e6b42c2736ec0876b41bcf648a2fd83b35bcec96090d9553ad8",
|
| 6 |
+
"optimizer.safetensors": "548339b52fbba1aaeb1c93c53e43c02c1687ced1f3a66862c537ff20bbdd28af",
|
| 7 |
+
"training_state.json": "e26cf17aa74553b6bd8ac2d40c17fbba2113629474f78a1592efb4a0d80b1de1"
|
| 8 |
+
}
|
| 9 |
+
}
|
checkpoints/step_00020000/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"checkpoint_type": "pretrain",
|
| 4 |
+
"model_config": {
|
| 5 |
+
"vocab_size": 4278,
|
| 6 |
+
"max_seq_len": 256,
|
| 7 |
+
"n_outcomes": 5,
|
| 8 |
+
"d_model": 256,
|
| 9 |
+
"n_layers": 8,
|
| 10 |
+
"n_heads": 4,
|
| 11 |
+
"d_ff": 1024,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"rope_base": 10000.0
|
| 14 |
+
},
|
| 15 |
+
"training_config": {
|
| 16 |
+
"lr": 0.0003,
|
| 17 |
+
"weight_decay": 0.01,
|
| 18 |
+
"max_grad_norm": 1.0,
|
| 19 |
+
"warmup_steps": 1000,
|
| 20 |
+
"total_steps": 100000,
|
| 21 |
+
"batch_size": 256,
|
| 22 |
+
"max_ply": 256,
|
| 23 |
+
"discard_ply_limit": false,
|
| 24 |
+
"num_workers": 4,
|
| 25 |
+
"use_amp": true,
|
| 26 |
+
"accumulation_steps": 1,
|
| 27 |
+
"log_interval": 10,
|
| 28 |
+
"eval_interval": 500,
|
| 29 |
+
"checkpoint_interval": 5000,
|
| 30 |
+
"base_seed": 42,
|
| 31 |
+
"val_seed": 9223372036854775807,
|
| 32 |
+
"val_games": 512,
|
| 33 |
+
"checkpoint_dir": "checkpoints",
|
| 34 |
+
"log_dir": "logs",
|
| 35 |
+
"use_wandb": false,
|
| 36 |
+
"wandb_project": "pawn",
|
| 37 |
+
"device": "cuda"
|
| 38 |
+
}
|
| 39 |
+
}
|
checkpoints/step_00020000/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e0247e396727e6b42c2736ec0876b41bcf648a2fd83b35bcec96090d9553ad8
|
| 3 |
+
size 38102280
|
checkpoints/step_00020000/optimizer.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:548339b52fbba1aaeb1c93c53e43c02c1687ced1f3a66862c537ff20bbdd28af
|
| 3 |
+
size 76210148
|
checkpoints/step_00020000/training_state.json
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"global_step": 20000,
|
| 4 |
+
"scheduler_state_dict": {
|
| 5 |
+
"step": 20000
|
| 6 |
+
},
|
| 7 |
+
"scaler_state_dict": {
|
| 8 |
+
"scale": 262144.0,
|
| 9 |
+
"growth_factor": 2.0,
|
| 10 |
+
"backoff_factor": 0.5,
|
| 11 |
+
"growth_interval": 2000,
|
| 12 |
+
"_growth_tracker": 453
|
| 13 |
+
},
|
| 14 |
+
"optimizer_meta": {
|
| 15 |
+
"param_groups": [
|
| 16 |
+
{
|
| 17 |
+
"lr": 0.0002761963384930274,
|
| 18 |
+
"betas": [
|
| 19 |
+
0.9,
|
| 20 |
+
0.999
|
| 21 |
+
],
|
| 22 |
+
"eps": 1e-08,
|
| 23 |
+
"weight_decay": 0.01,
|
| 24 |
+
"amsgrad": false,
|
| 25 |
+
"maximize": false,
|
| 26 |
+
"foreach": null,
|
| 27 |
+
"capturable": false,
|
| 28 |
+
"differentiable": false,
|
| 29 |
+
"fused": null,
|
| 30 |
+
"decoupled_weight_decay": true,
|
| 31 |
+
"params": [
|
| 32 |
+
0,
|
| 33 |
+
1,
|
| 34 |
+
2,
|
| 35 |
+
3,
|
| 36 |
+
4,
|
| 37 |
+
5,
|
| 38 |
+
6,
|
| 39 |
+
7,
|
| 40 |
+
8,
|
| 41 |
+
9,
|
| 42 |
+
10,
|
| 43 |
+
11,
|
| 44 |
+
12,
|
| 45 |
+
13,
|
| 46 |
+
14,
|
| 47 |
+
15,
|
| 48 |
+
16,
|
| 49 |
+
17,
|
| 50 |
+
18,
|
| 51 |
+
19,
|
| 52 |
+
20,
|
| 53 |
+
21,
|
| 54 |
+
22,
|
| 55 |
+
23,
|
| 56 |
+
24,
|
| 57 |
+
25,
|
| 58 |
+
26,
|
| 59 |
+
27,
|
| 60 |
+
28,
|
| 61 |
+
29,
|
| 62 |
+
30,
|
| 63 |
+
31,
|
| 64 |
+
32,
|
| 65 |
+
33,
|
| 66 |
+
34,
|
| 67 |
+
35,
|
| 68 |
+
36,
|
| 69 |
+
37,
|
| 70 |
+
38,
|
| 71 |
+
39,
|
| 72 |
+
40,
|
| 73 |
+
41,
|
| 74 |
+
42,
|
| 75 |
+
43,
|
| 76 |
+
44,
|
| 77 |
+
45,
|
| 78 |
+
46,
|
| 79 |
+
47,
|
| 80 |
+
48,
|
| 81 |
+
49,
|
| 82 |
+
50,
|
| 83 |
+
51,
|
| 84 |
+
52,
|
| 85 |
+
53,
|
| 86 |
+
54,
|
| 87 |
+
55,
|
| 88 |
+
56,
|
| 89 |
+
57,
|
| 90 |
+
58,
|
| 91 |
+
59,
|
| 92 |
+
60,
|
| 93 |
+
61,
|
| 94 |
+
62,
|
| 95 |
+
63,
|
| 96 |
+
64,
|
| 97 |
+
65,
|
| 98 |
+
66,
|
| 99 |
+
67,
|
| 100 |
+
68,
|
| 101 |
+
69,
|
| 102 |
+
70,
|
| 103 |
+
71,
|
| 104 |
+
72,
|
| 105 |
+
73,
|
| 106 |
+
74,
|
| 107 |
+
75,
|
| 108 |
+
76,
|
| 109 |
+
77,
|
| 110 |
+
78
|
| 111 |
+
]
|
| 112 |
+
}
|
| 113 |
+
],
|
| 114 |
+
"scalars": null
|
| 115 |
+
},
|
| 116 |
+
"torch_rng_state": "h08rNwf38rOvAAAAAQAAAMIBAAAAAAAA5bVTKAAAAAC1CIbqAAAAAA3rKqAAAAAAGqogVAAAAABigsYrAAAAAAK5m78AAAAAcE43pAAAAAB+YHT7AAAAABSmyhcAAAAAiwIFRwAAAABru0i1AAAAAEAG8YYAAAAAyvLLfgAAAADgU+nAAAAAAFqEKAgAAAAA72kt/gAAAAC/V34GAAAAAJRKvuQAAAAARwrwvAAAAAB8+1buAAAAAHw2FqQAAAAATEURKwAAAADppdGbAAAAALFK1WsAAAAA9FjHJwAAAAAKRlfWAAAAAAxZlR8AAAAA7PS73AAAAABeXBUoAAAAAIA1IjIAAAAAC2JcuwAAAACKVAI/AAAAAJ4PbRkAAAAA/OdaFAAAAAB2VuHVAAAAAOjy5bMAAAAAKivF/gAAAACA5OnEAAAAAC+aNjMAAAAABNa0HQAAAABgCGjwAAAAACVYnKcAAAAAYnQAHgAAAACed89WAAAAAMk1u3oAAAAAUKriUQAAAAC9t94zAAAAAPbZbRcAAAAAGOth4wAAAAAs80jFAAAAAKRt/bQAAAAAa6dpYQAAAABipCt+AAAAAN56VTYAAAAAXpizpwAAAABgJXGrAAAAAFiUPsEAAAAAvOynQAAAAACWnN22AAAAAEPkFa4AAAAAsqkleAAAAAAVdN9zAAAAAHvqifQAAAAAA0n6LwAAAACAuTx4AAAAAD6Z93cAAAAApS7GTwAAAADjfskvAAAAAA+ewj4AAAAAl9DQLAAAAAD9kSH/AAAAAEA+GYgAAAAAvsbprwAAAAB3NN+0AAAAANc6YrwAAAAAnfkv2gAAAACMQfjeAAAAAGA0pCgAAAAAvTnvHAAAAAC4jLlTAAAAALc9p4wAAAAAZjDhogAAAADAlSqiAAAAAAzt0eMAAAAAguPkrgAAAACEQa6HAAAAAFfNjdEAAAAAGoYWVAAAAABF3yVKAAAAADgonYgAAAAAb6PyRAAAAACG7abbAAAAABqiUFAAAAAA0SzSDAAAAACeoR0/AAAAAOgi1CcAAAAA9Dw29QAAAABFfJluAAAAAP8tr7AAAAAAaLAamQAAAABjRZ8PAAAAALEV3aUAAAAAAIRuuwAAAACHGl7bAAAAAIcEIUUAAAAASi25VAAAAADR+U0fAAAAAOWbOB4AAAAAmvLGFAAAAAAJNxX1AAAAAHtE2ScAAAAAk2NOTgAAAAD+65BYAAAAAOsZFekAAAAAWIKxJAAAAABiKM6BAAAAAE1YYQEAAAAA2sop3wAAAABCIl6iAAAAAP/T7yYAAAAAWZNJcQAAAABFOZsSAAAAAFzETvMAAAAAcFP0/AAAAADurpxIAAAAAIaDQdMAAAAAsS/3TAAAAAA0YVttAAAAAGstmGoAAAAAQqLbLAAAAABXzRZ4AAAAANqZ/18AAAAAdVwB9AAAAADsfpg2AAAAAMy2zKoAAAAACSy3KAAAAABDzRafAAAAAAzi404AAAAA3pVkeQAAAAAQ5caLAAAAAHXDzEcAAAAAS59+2QAAAABV2J9+AAAAAMejV/YAAAAAKwKUxwAAAADKYpLRAAAAAPvntMAAAAAA5v2IfQAAAAC9+IekAAAAABN+sCwAAAAAgOOzyQAAAABjjSj0AAAAAG2AsasAAAAA4CMoZgAAAAAHpm84AAAAANC8aZAAAAAAfiO4WQAAAAAzUg0HAAAAAGyLSTsAAAAAXBS5MQAAAADs/KtnAAAAAJO9YCIAAAAAnoesBAAAAAD0ogF9AAAAABKv9CQAAAAA1gbspQAAAABmhttUAAAAAOJ/TG4AAAAAhS0PpAAAAADx5KyDAAAAADmNIvEAAAAA5KJXxAAAAAAGOjP/AAAAADzZdKcAAAAAA8FPFgAAAAALDaYcAAAAAB0U9pgAAAAAr2EncwAAAABok7yTAAAAACRvTNIAAAAAcQKJkQAAAAAvcQE8AAAAALcp3hEAAAAA9ChDqgAAAACs3b0CAAAAAHROJI0AAAAA1R3GBgAAAAA6qGxzAAAAAOeWbGcAAAAAU9x6SgAAAADH/vP6AAAAAKOI8XIAAAAA8KA2VQAAAAAfGgFFAAAAANtgjI0AAAAAsGlzhQAAAABVwSsRAAAAAGXdNw0AAAAAbQSpewAAAAAx9MXlAAAAAMmuVXQAAAAAkTwfDwAAAAB9+wFIAAAAALLWklQAAAAASRZ3CgAAAABCqD25AAAAADrP138AAAAAA9Li1AAAAABj0TYLAAAAAHNsR/oAAAAA2maP7gAAAAB/GyXyAAAAAEsu8MgAAAAAspXNQgAAAAC1Xgb/AAAAALkz90EAAAAA48lYngAAAABm+qG1AAAAAMcXfW0AAAAAE90WugAAAADnSDIFAAAAAJmpDgYAAAAAnAkyCAAAAAD/iCk2AAAAAElp5lsAAAAAeHAxnAAAAADbQgcKAAAAAJ8CEbIAAAAAVY4u5wAAAAAOrWjNAAAAAMMgCpAAAAAAt+qJqQAAAACUbpfvAAAAAFTRiEYAAAAAi7yHngAAAAAyQ1DYAAAAAGJgz/QAAAAA7solGQAAAABxT00rAAAAAArI5bYAAAAARGqyPAAAAABFr7c7AAAAAFEE51wAAAAAum0rtgAAAAAMHHNDAAAAAM3EjogAAAAA4o430QAAAADk7fE+AAAAAAViqkEAAAAAGes0MwAAAABdjpDvAAAAAEbUJRQAAAAAdrnGOgAAAABrQWIuAAAAACvnK2sAAAAAEyRlJgAAAAC/12WyAAAAAEBI67MAAAAAtGGEZwAAAABGu6sdAAAAALuyVuYAAAAAgx0QzgAAAABtMTt3AAAAAMjVDVsAAAAARt941wAAAAAgkpeRAAAAAFLnED4AAAAAjv2/6wAAAADia1FwAAAAAJnk6jwAAAAAJ3jF/AAAAAAhFYtcAAAAAD0VY2UAAAAAd3N11gAAAACzKnTWAAAAAGIujsQAAAAAooiH2gAAAAB/yEYxAAAAADjjTNQAAAAASNHEhwAAAABtncUMAAAAADSX9KIAAAAAbiZNSAAAAACwhbuVAAAAALLTcSYAAAAAmSzpFAAAAABq7Ct3AAAAAPWp3D4AAAAAAe4gpwAAAACsTU9qAAAAAP2tI4QAAAAARNCL7QAAAAA06+aVAAAAAJoNQN0AAAAAJWtPhwAAAAAXEn70AAAAAJssGqkAAAAAAKFQUQAAAACT9kT0AAAAAFu3JOUAAAAANO5gIAAAAAB+VuXIAAAAAPHeI4QAAAAADT7B/wAAAADfokXMAAAAAJfg/f0AAAAArlfEXAAAAAAkYKSUAAAAAEQpA7IAAAAALJqctgAAAACjeaRfAAAAABqE3tEAAAAA0rJopAAAAAD4yLv2AAAAAFOxR8MAAAAAeEtDDwAAAACwX8hdAAAAAKjoZU0AAAAAV26whwAAAAC5K7eKAAAAACxfVTIAAAAA7fBnGAAAAACd50/yAAAAAFP5Jh8AAAAAiSQPKAAAAADlfY5NAAAAAP/8zuIAAAAAoS9UEgAAAAD7pTW4AAAAAHB5cVIAAAAARbh83AAAAADaocV3AAAAAG7HtekAAAAAmNEfcwAAAAD3YdvOAAAAAJPCtA4AAAAAoxmOwwAAAABrPuElAAAAAMak4r8AAAAAAUC/cgAAAAAhZCuIAAAAAN2mf+kAAAAAx83h5gAAAAAmbYMwAAAAAJiCnrQAAAAAQU55rAAAAADw+3SUAAAAAM/2XnEAAAAAcU07SwAAAADA5e+DAAAAAHttOgkAAAAAiGbszAAAAABluI1oAAAAAH2uALwAAAAAGUF/5wAAAABmMR5KAAAAALSCYAUAAAAAiKGWXwAAAAAnvn1PAAAAAAqH+bsAAAAABwJn9QAAAAAzsIpwAAAAABrZ0GcAAAAAhuYe5wAAAABO+FTkAAAAAM2/KvIAAAAAsRPS3gAAAACdx440AAAAAP4tumYAAAAA+bmCzQAAAADJHxanAAAAAPXucQIAAAAAonSCwAAAAACgjOtnAAAAABoWVc0AAAAAkdoAbwAAAACz7aNzAAAAAKW4ruYAAAAA8cr63gAAAABE6SNJAAAAAD2BPGMAAAAAdnPyEgAAAAD8U+XcAAAAAAcdXwAAAAAAEvif/wAAAAD8rlYaAAAAACG0El4AAAAA3437sgAAAAALlh8WAAAAAG9kxgcAAAAAdz6Z+QAAAACUVLt6AAAAAMOofScAAAAAiWSpbAAAAACQguXTAAAAAFroExIAAAAApWtqzgAAAABNjjRLAAAAAGf7vuUAAAAAHZvizwAAAAAHed+7AAAAABQIe3UAAAAA4XtGOAAAAAASc2AdAAAAANHNDk8AAAAAJzMFKAAAAAAXXfpMAAAAABu0ce0AAAAA4/NkJgAAAAA/+1OAAAAAADu2hoIAAAAAZmCDfwAAAADXQL4fAAAAAAS4ug0AAAAAG+OmAgAAAADu60ldAAAAAMsuxCUAAAAAj8UWaQAAAACr0RxfAAAAAAMdSFQAAAAA36GiRwAAAABk4uomAAAAAFcoM5EAAAAAs+yK7gAAAABcwIvCAAAAAIIBImQAAAAAfG9I6AAAAAAn4QM2AAAAALGuHggAAAAAcaGswgAAAACpMriNAAAAAI7aXk0AAAAAZTO/DgAAAABLMd42AAAAAEU6FlIAAAAAPk5rDAAAAADcO1eQAAAAABikK+4AAAAA6IPhsAAAAAALNbnjAAAAAH5zJeMAAAAAkpFEPwAAAABcpLkXAAAAAEQpk1gAAAAAg1qEDQAAAABVMEyiAAAAANdjUEYAAAAAlsVqdgAAAAAvFPWeAAAAAKqzNcIAAAAA98nh2wAAAAD29MteAAAAAAO9kOUAAAAAt+873wAAAAAAY/ZgAAAAAFdmymYAAAAAh6+t/wAAAACAUVNnAAAAAGFVacwAAAAAhn/nDQAAAADUmhjPAAAAAHmQWBgAAAAAql9jiwAAAACFC0uIAAAAAMPDQeUAAAAAR0NZ1QAAAABBn5deAAAAAB5TwDYAAAAAJuVeBQAAAABkVY1bAAAAAFMthYgAAAAA3ZKyXQAAAABC6R5uAAAAAPHG1l8AAAAA9cob5QAAAABoNpXQAAAAAADVHtoAAAAAoXyWdAAAAABzlNNLAAAAAAd7JTwAAAAAFQy61gAAAADNezk1AAAAAD3N/EMAAAAAqi9OHQAAAADu9V/uAAAAALEpjVkAAAAAy2UzWwAAAAC2chu5AAAAAIVrQEIAAAAAItdI3gAAAADERnBmAAAAAA34/pwAAAAARc9ZfAAAAAAlM5R9AAAAAMUAqCsAAAAAf48ghQAAAADY/1U9AAAAAGIcDcwAAAAAocqYuAAAAAA0etJoAAAAALJFCoYAAAAA3+LS9gAAAAAG6CGqAAAAAIZELuoAAAAAesiBRAAAAADIDXwHAAAAANY8KSQAAAAAzq0RkAAAAADNNOjsAAAAABZVTTsAAAAAMOJYbAAAAABNxQyWAAAAAOtIPlMAAAAAoxn9VQAAAAAyfQpBAAAAAFCxFm4AAAAAeul3ewAAAADixnHqAAAAACqMW+EAAAAAByiTLAAAAACfpwVeAAAAADppPwAAAAAANCiqtgAAAABv7Qs5AAAAAP5TNKcAAAAAOMhxeQAAAAAww45yAAAAALOIDngAAAAApdPt+gAAAABO/05GAAAAAHDeD98AAAAAH7Ye8QAAAADnCh6QAAAAANAEPSwAAAAAlwTimwAAAAB2scxWAAAAAMq2rHkAAAAAB/b8EwAAAAAQSW3SAAAAAMuKLnsAAAAAmRG5UAAAAAC2sGuCAAAAAIpjqUEAAAAAgvq4QgAAAABBhm3kAAAAAGa9p+kAAAAAJYzu4QAAAADz1dQfAAAAANC6C5YAAAAAbvuQowAAAABJEwlLAAAAAHwIY4oAAAAASP5riAAAAABzZpcJAAAAAKrZpgYAAAAAWaJRswAAAADVqREtAAAAAKjAN84AAAAACawW3wAAAADrqt14AAAAADA/MwwAAAAA6VoIkQAAAADKRR/qAAAAAGMhqpUAAAAA2gUL3QAAAADMhsOJAAAAAHy9Ru8AAAAApXZ9wwAAAAA6EmlLAAAAAM6zmT0AAAAAkPNYqQAAAAD6QheVAAAAAJ82DQgAAAAAqH9qbAAAAACFQMpFAAAAAMH5ZNsAAAAAq5UO+QAAAABRdDH1AAAAAMJR3gsAAAAAlcaqzQAAAADzWWEdAAAAAIQKqtoAAAAAwrvNwAAAAAD17N09AAAAABGwSVwAAAAA+rnH+AAAAAARTa6vAAAAAJLCGA4AAAAAIGRTswAAAAAbOwKbAAAAAFyofAUAAAAALDaxtAAAAADSpDyBAAAAABSlmB8AAAAAEX19IwAAAACEwHF8AAAAADXsqPwAAAAAe5IE4QAAAACZqU9pAAAAAGzMKdEAAAAAZuqdjQAAAABnaWP4AAAAAPW3T6wAAAAApqmHCgAAAACkUeS0AAAAANd0gnIAAAAAJ1219gAAAADYblUyAAAAANaeA3MAAAAA2JW20QAAAABE13o1AAAAAHE31nUAAAAAmABkGAAAAABaQFzGAAAAAKeAVTQAAAAAOU+S1wAAAABpLdz8AAAAAHOfZiIAAAAAKWLzEgAAAABSkJu/AAAAAKedPQQAAAAACtxWlAAAAABVHrLEAAAAAIgBSwMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
|
| 117 |
+
"cuda_rng_state": "BWVGVHP/DAAAAAAAAAAAAA=="
|
| 118 |
+
}
|
checkpoints/step_00025000/.complete
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"files": {
|
| 4 |
+
"config.json": "7d7a8111595963bc28a9b70cda8402bec27f0d6ca34b35fef100bfe5dba4ebeb",
|
| 5 |
+
"model.safetensors": "d9e5af2e21c1741223708defe72884bc6f46cebdf21341491ee812be011c60b5",
|
| 6 |
+
"optimizer.safetensors": "5870f7bc98c3bd2ed17b513b6cd5d2ff0f0a348d4c64fc1f4279b2f9374f610d",
|
| 7 |
+
"training_state.json": "de04be01083ffeaad75d7f94e9452a9b7ce1a24e44fb727b040166244769ae49"
|
| 8 |
+
}
|
| 9 |
+
}
|
checkpoints/step_00025000/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"checkpoint_type": "pretrain",
|
| 4 |
+
"model_config": {
|
| 5 |
+
"vocab_size": 4278,
|
| 6 |
+
"max_seq_len": 256,
|
| 7 |
+
"n_outcomes": 5,
|
| 8 |
+
"d_model": 256,
|
| 9 |
+
"n_layers": 8,
|
| 10 |
+
"n_heads": 4,
|
| 11 |
+
"d_ff": 1024,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"rope_base": 10000.0
|
| 14 |
+
},
|
| 15 |
+
"training_config": {
|
| 16 |
+
"lr": 0.0003,
|
| 17 |
+
"weight_decay": 0.01,
|
| 18 |
+
"max_grad_norm": 1.0,
|
| 19 |
+
"warmup_steps": 1000,
|
| 20 |
+
"total_steps": 100000,
|
| 21 |
+
"batch_size": 256,
|
| 22 |
+
"max_ply": 256,
|
| 23 |
+
"discard_ply_limit": false,
|
| 24 |
+
"num_workers": 4,
|
| 25 |
+
"use_amp": true,
|
| 26 |
+
"accumulation_steps": 1,
|
| 27 |
+
"log_interval": 10,
|
| 28 |
+
"eval_interval": 500,
|
| 29 |
+
"checkpoint_interval": 5000,
|
| 30 |
+
"base_seed": 42,
|
| 31 |
+
"val_seed": 9223372036854775807,
|
| 32 |
+
"val_games": 512,
|
| 33 |
+
"checkpoint_dir": "checkpoints",
|
| 34 |
+
"log_dir": "logs",
|
| 35 |
+
"use_wandb": false,
|
| 36 |
+
"wandb_project": "pawn",
|
| 37 |
+
"device": "cuda"
|
| 38 |
+
}
|
| 39 |
+
}
|
checkpoints/step_00025000/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d9e5af2e21c1741223708defe72884bc6f46cebdf21341491ee812be011c60b5
|
| 3 |
+
size 38102280
|
checkpoints/step_00025000/optimizer.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5870f7bc98c3bd2ed17b513b6cd5d2ff0f0a348d4c64fc1f4279b2f9374f610d
|
| 3 |
+
size 76210148
|
checkpoints/step_00025000/training_state.json
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"global_step": 25000,
|
| 4 |
+
"scheduler_state_dict": {
|
| 5 |
+
"step": 25000
|
| 6 |
+
},
|
| 7 |
+
"scaler_state_dict": {
|
| 8 |
+
"scale": 1048576.0,
|
| 9 |
+
"growth_factor": 2.0,
|
| 10 |
+
"backoff_factor": 0.5,
|
| 11 |
+
"growth_interval": 2000,
|
| 12 |
+
"_growth_tracker": 1453
|
| 13 |
+
},
|
| 14 |
+
"optimizer_meta": {
|
| 15 |
+
"param_groups": [
|
| 16 |
+
{
|
| 17 |
+
"lr": 0.00026270409514418447,
|
| 18 |
+
"betas": [
|
| 19 |
+
0.9,
|
| 20 |
+
0.999
|
| 21 |
+
],
|
| 22 |
+
"eps": 1e-08,
|
| 23 |
+
"weight_decay": 0.01,
|
| 24 |
+
"amsgrad": false,
|
| 25 |
+
"maximize": false,
|
| 26 |
+
"foreach": null,
|
| 27 |
+
"capturable": false,
|
| 28 |
+
"differentiable": false,
|
| 29 |
+
"fused": null,
|
| 30 |
+
"decoupled_weight_decay": true,
|
| 31 |
+
"params": [
|
| 32 |
+
0,
|
| 33 |
+
1,
|
| 34 |
+
2,
|
| 35 |
+
3,
|
| 36 |
+
4,
|
| 37 |
+
5,
|
| 38 |
+
6,
|
| 39 |
+
7,
|
| 40 |
+
8,
|
| 41 |
+
9,
|
| 42 |
+
10,
|
| 43 |
+
11,
|
| 44 |
+
12,
|
| 45 |
+
13,
|
| 46 |
+
14,
|
| 47 |
+
15,
|
| 48 |
+
16,
|
| 49 |
+
17,
|
| 50 |
+
18,
|
| 51 |
+
19,
|
| 52 |
+
20,
|
| 53 |
+
21,
|
| 54 |
+
22,
|
| 55 |
+
23,
|
| 56 |
+
24,
|
| 57 |
+
25,
|
| 58 |
+
26,
|
| 59 |
+
27,
|
| 60 |
+
28,
|
| 61 |
+
29,
|
| 62 |
+
30,
|
| 63 |
+
31,
|
| 64 |
+
32,
|
| 65 |
+
33,
|
| 66 |
+
34,
|
| 67 |
+
35,
|
| 68 |
+
36,
|
| 69 |
+
37,
|
| 70 |
+
38,
|
| 71 |
+
39,
|
| 72 |
+
40,
|
| 73 |
+
41,
|
| 74 |
+
42,
|
| 75 |
+
43,
|
| 76 |
+
44,
|
| 77 |
+
45,
|
| 78 |
+
46,
|
| 79 |
+
47,
|
| 80 |
+
48,
|
| 81 |
+
49,
|
| 82 |
+
50,
|
| 83 |
+
51,
|
| 84 |
+
52,
|
| 85 |
+
53,
|
| 86 |
+
54,
|
| 87 |
+
55,
|
| 88 |
+
56,
|
| 89 |
+
57,
|
| 90 |
+
58,
|
| 91 |
+
59,
|
| 92 |
+
60,
|
| 93 |
+
61,
|
| 94 |
+
62,
|
| 95 |
+
63,
|
| 96 |
+
64,
|
| 97 |
+
65,
|
| 98 |
+
66,
|
| 99 |
+
67,
|
| 100 |
+
68,
|
| 101 |
+
69,
|
| 102 |
+
70,
|
| 103 |
+
71,
|
| 104 |
+
72,
|
| 105 |
+
73,
|
| 106 |
+
74,
|
| 107 |
+
75,
|
| 108 |
+
76,
|
| 109 |
+
77,
|
| 110 |
+
78
|
| 111 |
+
]
|
| 112 |
+
}
|
| 113 |
+
],
|
| 114 |
+
"scalars": null
|
| 115 |
+
},
|
| 116 |
+
"torch_rng_state": "h08rNwf38rOvAAAAAQAAAMIBAAAAAAAA5bVTKAAAAAC1CIbqAAAAAA3rKqAAAAAAGqogVAAAAABigsYrAAAAAAK5m78AAAAAcE43pAAAAAB+YHT7AAAAABSmyhcAAAAAiwIFRwAAAABru0i1AAAAAEAG8YYAAAAAyvLLfgAAAADgU+nAAAAAAFqEKAgAAAAA72kt/gAAAAC/V34GAAAAAJRKvuQAAAAARwrwvAAAAAB8+1buAAAAAHw2FqQAAAAATEURKwAAAADppdGbAAAAALFK1WsAAAAA9FjHJwAAAAAKRlfWAAAAAAxZlR8AAAAA7PS73AAAAABeXBUoAAAAAIA1IjIAAAAAC2JcuwAAAACKVAI/AAAAAJ4PbRkAAAAA/OdaFAAAAAB2VuHVAAAAAOjy5bMAAAAAKivF/gAAAACA5OnEAAAAAC+aNjMAAAAABNa0HQAAAABgCGjwAAAAACVYnKcAAAAAYnQAHgAAAACed89WAAAAAMk1u3oAAAAAUKriUQAAAAC9t94zAAAAAPbZbRcAAAAAGOth4wAAAAAs80jFAAAAAKRt/bQAAAAAa6dpYQAAAABipCt+AAAAAN56VTYAAAAAXpizpwAAAABgJXGrAAAAAFiUPsEAAAAAvOynQAAAAACWnN22AAAAAEPkFa4AAAAAsqkleAAAAAAVdN9zAAAAAHvqifQAAAAAA0n6LwAAAACAuTx4AAAAAD6Z93cAAAAApS7GTwAAAADjfskvAAAAAA+ewj4AAAAAl9DQLAAAAAD9kSH/AAAAAEA+GYgAAAAAvsbprwAAAAB3NN+0AAAAANc6YrwAAAAAnfkv2gAAAACMQfjeAAAAAGA0pCgAAAAAvTnvHAAAAAC4jLlTAAAAALc9p4wAAAAAZjDhogAAAADAlSqiAAAAAAzt0eMAAAAAguPkrgAAAACEQa6HAAAAAFfNjdEAAAAAGoYWVAAAAABF3yVKAAAAADgonYgAAAAAb6PyRAAAAACG7abbAAAAABqiUFAAAAAA0SzSDAAAAACeoR0/AAAAAOgi1CcAAAAA9Dw29QAAAABFfJluAAAAAP8tr7AAAAAAaLAamQAAAABjRZ8PAAAAALEV3aUAAAAAAIRuuwAAAACHGl7bAAAAAIcEIUUAAAAASi25VAAAAADR+U0fAAAAAOWbOB4AAAAAmvLGFAAAAAAJNxX1AAAAAHtE2ScAAAAAk2NOTgAAAAD+65BYAAAAAOsZFekAAAAAWIKxJAAAAABiKM6BAAAAAE1YYQEAAAAA2sop3wAAAABCIl6iAAAAAP/T7yYAAAAAWZNJcQAAAABFOZsSAAAAAFzETvMAAAAAcFP0/AAAAADurpxIAAAAAIaDQdMAAAAAsS/3TAAAAAA0YVttAAAAAGstmGoAAAAAQqLbLAAAAABXzRZ4AAAAANqZ/18AAAAAdVwB9AAAAADsfpg2AAAAAMy2zKoAAAAACSy3KAAAAABDzRafAAAAAAzi404AAAAA3pVkeQAAAAAQ5caLAAAAAHXDzEcAAAAAS59+2QAAAABV2J9+AAAAAMejV/YAAAAAKwKUxwAAAADKYpLRAAAAAPvntMAAAAAA5v2IfQAAAAC9+IekAAAAABN+sCwAAAAAgOOzyQAAAABjjSj0AAAAAG2AsasAAAAA4CMoZgAAAAAHpm84AAAAANC8aZAAAAAAfiO4WQAAAAAzUg0HAAAAAGyLSTsAAAAAXBS5MQAAAADs/KtnAAAAAJO9YCIAAAAAnoesBAAAAAD0ogF9AAAAABKv9CQAAAAA1gbspQAAAABmhttUAAAAAOJ/TG4AAAAAhS0PpAAAAADx5KyDAAAAADmNIvEAAAAA5KJXxAAAAAAGOjP/AAAAADzZdKcAAAAAA8FPFgAAAAALDaYcAAAAAB0U9pgAAAAAr2EncwAAAABok7yTAAAAACRvTNIAAAAAcQKJkQAAAAAvcQE8AAAAALcp3hEAAAAA9ChDqgAAAACs3b0CAAAAAHROJI0AAAAA1R3GBgAAAAA6qGxzAAAAAOeWbGcAAAAAU9x6SgAAAADH/vP6AAAAAKOI8XIAAAAA8KA2VQAAAAAfGgFFAAAAANtgjI0AAAAAsGlzhQAAAABVwSsRAAAAAGXdNw0AAAAAbQSpewAAAAAx9MXlAAAAAMmuVXQAAAAAkTwfDwAAAAB9+wFIAAAAALLWklQAAAAASRZ3CgAAAABCqD25AAAAADrP138AAAAAA9Li1AAAAABj0TYLAAAAAHNsR/oAAAAA2maP7gAAAAB/GyXyAAAAAEsu8MgAAAAAspXNQgAAAAC1Xgb/AAAAALkz90EAAAAA48lYngAAAABm+qG1AAAAAMcXfW0AAAAAE90WugAAAADnSDIFAAAAAJmpDgYAAAAAnAkyCAAAAAD/iCk2AAAAAElp5lsAAAAAeHAxnAAAAADbQgcKAAAAAJ8CEbIAAAAAVY4u5wAAAAAOrWjNAAAAAMMgCpAAAAAAt+qJqQAAAACUbpfvAAAAAFTRiEYAAAAAi7yHngAAAAAyQ1DYAAAAAGJgz/QAAAAA7solGQAAAABxT00rAAAAAArI5bYAAAAARGqyPAAAAABFr7c7AAAAAFEE51wAAAAAum0rtgAAAAAMHHNDAAAAAM3EjogAAAAA4o430QAAAADk7fE+AAAAAAViqkEAAAAAGes0MwAAAABdjpDvAAAAAEbUJRQAAAAAdrnGOgAAAABrQWIuAAAAACvnK2sAAAAAEyRlJgAAAAC/12WyAAAAAEBI67MAAAAAtGGEZwAAAABGu6sdAAAAALuyVuYAAAAAgx0QzgAAAABtMTt3AAAAAMjVDVsAAAAARt941wAAAAAgkpeRAAAAAFLnED4AAAAAjv2/6wAAAADia1FwAAAAAJnk6jwAAAAAJ3jF/AAAAAAhFYtcAAAAAD0VY2UAAAAAd3N11gAAAACzKnTWAAAAAGIujsQAAAAAooiH2gAAAAB/yEYxAAAAADjjTNQAAAAASNHEhwAAAABtncUMAAAAADSX9KIAAAAAbiZNSAAAAACwhbuVAAAAALLTcSYAAAAAmSzpFAAAAABq7Ct3AAAAAPWp3D4AAAAAAe4gpwAAAACsTU9qAAAAAP2tI4QAAAAARNCL7QAAAAA06+aVAAAAAJoNQN0AAAAAJWtPhwAAAAAXEn70AAAAAJssGqkAAAAAAKFQUQAAAACT9kT0AAAAAFu3JOUAAAAANO5gIAAAAAB+VuXIAAAAAPHeI4QAAAAADT7B/wAAAADfokXMAAAAAJfg/f0AAAAArlfEXAAAAAAkYKSUAAAAAEQpA7IAAAAALJqctgAAAACjeaRfAAAAABqE3tEAAAAA0rJopAAAAAD4yLv2AAAAAFOxR8MAAAAAeEtDDwAAAACwX8hdAAAAAKjoZU0AAAAAV26whwAAAAC5K7eKAAAAACxfVTIAAAAA7fBnGAAAAACd50/yAAAAAFP5Jh8AAAAAiSQPKAAAAADlfY5NAAAAAP/8zuIAAAAAoS9UEgAAAAD7pTW4AAAAAHB5cVIAAAAARbh83AAAAADaocV3AAAAAG7HtekAAAAAmNEfcwAAAAD3YdvOAAAAAJPCtA4AAAAAoxmOwwAAAABrPuElAAAAAMak4r8AAAAAAUC/cgAAAAAhZCuIAAAAAN2mf+kAAAAAx83h5gAAAAAmbYMwAAAAAJiCnrQAAAAAQU55rAAAAADw+3SUAAAAAM/2XnEAAAAAcU07SwAAAADA5e+DAAAAAHttOgkAAAAAiGbszAAAAABluI1oAAAAAH2uALwAAAAAGUF/5wAAAABmMR5KAAAAALSCYAUAAAAAiKGWXwAAAAAnvn1PAAAAAAqH+bsAAAAABwJn9QAAAAAzsIpwAAAAABrZ0GcAAAAAhuYe5wAAAABO+FTkAAAAAM2/KvIAAAAAsRPS3gAAAACdx440AAAAAP4tumYAAAAA+bmCzQAAAADJHxanAAAAAPXucQIAAAAAonSCwAAAAACgjOtnAAAAABoWVc0AAAAAkdoAbwAAAACz7aNzAAAAAKW4ruYAAAAA8cr63gAAAABE6SNJAAAAAD2BPGMAAAAAdnPyEgAAAAD8U+XcAAAAAAcdXwAAAAAAEvif/wAAAAD8rlYaAAAAACG0El4AAAAA3437sgAAAAALlh8WAAAAAG9kxgcAAAAAdz6Z+QAAAACUVLt6AAAAAMOofScAAAAAiWSpbAAAAACQguXTAAAAAFroExIAAAAApWtqzgAAAABNjjRLAAAAAGf7vuUAAAAAHZvizwAAAAAHed+7AAAAABQIe3UAAAAA4XtGOAAAAAASc2AdAAAAANHNDk8AAAAAJzMFKAAAAAAXXfpMAAAAABu0ce0AAAAA4/NkJgAAAAA/+1OAAAAAADu2hoIAAAAAZmCDfwAAAADXQL4fAAAAAAS4ug0AAAAAG+OmAgAAAADu60ldAAAAAMsuxCUAAAAAj8UWaQAAAACr0RxfAAAAAAMdSFQAAAAA36GiRwAAAABk4uomAAAAAFcoM5EAAAAAs+yK7gAAAABcwIvCAAAAAIIBImQAAAAAfG9I6AAAAAAn4QM2AAAAALGuHggAAAAAcaGswgAAAACpMriNAAAAAI7aXk0AAAAAZTO/DgAAAABLMd42AAAAAEU6FlIAAAAAPk5rDAAAAADcO1eQAAAAABikK+4AAAAA6IPhsAAAAAALNbnjAAAAAH5zJeMAAAAAkpFEPwAAAABcpLkXAAAAAEQpk1gAAAAAg1qEDQAAAABVMEyiAAAAANdjUEYAAAAAlsVqdgAAAAAvFPWeAAAAAKqzNcIAAAAA98nh2wAAAAD29MteAAAAAAO9kOUAAAAAt+873wAAAAAAY/ZgAAAAAFdmymYAAAAAh6+t/wAAAACAUVNnAAAAAGFVacwAAAAAhn/nDQAAAADUmhjPAAAAAHmQWBgAAAAAql9jiwAAAACFC0uIAAAAAMPDQeUAAAAAR0NZ1QAAAABBn5deAAAAAB5TwDYAAAAAJuVeBQAAAABkVY1bAAAAAFMthYgAAAAA3ZKyXQAAAABC6R5uAAAAAPHG1l8AAAAA9cob5QAAAABoNpXQAAAAAADVHtoAAAAAoXyWdAAAAABzlNNLAAAAAAd7JTwAAAAAFQy61gAAAADNezk1AAAAAD3N/EMAAAAAqi9OHQAAAADu9V/uAAAAALEpjVkAAAAAy2UzWwAAAAC2chu5AAAAAIVrQEIAAAAAItdI3gAAAADERnBmAAAAAA34/pwAAAAARc9ZfAAAAAAlM5R9AAAAAMUAqCsAAAAAf48ghQAAAADY/1U9AAAAAGIcDcwAAAAAocqYuAAAAAA0etJoAAAAALJFCoYAAAAA3+LS9gAAAAAG6CGqAAAAAIZELuoAAAAAesiBRAAAAADIDXwHAAAAANY8KSQAAAAAzq0RkAAAAADNNOjsAAAAABZVTTsAAAAAMOJYbAAAAABNxQyWAAAAAOtIPlMAAAAAoxn9VQAAAAAyfQpBAAAAAFCxFm4AAAAAeul3ewAAAADixnHqAAAAACqMW+EAAAAAByiTLAAAAACfpwVeAAAAADppPwAAAAAANCiqtgAAAABv7Qs5AAAAAP5TNKcAAAAAOMhxeQAAAAAww45yAAAAALOIDngAAAAApdPt+gAAAABO/05GAAAAAHDeD98AAAAAH7Ye8QAAAADnCh6QAAAAANAEPSwAAAAAlwTimwAAAAB2scxWAAAAAMq2rHkAAAAAB/b8EwAAAAAQSW3SAAAAAMuKLnsAAAAAmRG5UAAAAAC2sGuCAAAAAIpjqUEAAAAAgvq4QgAAAABBhm3kAAAAAGa9p+kAAAAAJYzu4QAAAADz1dQfAAAAANC6C5YAAAAAbvuQowAAAABJEwlLAAAAAHwIY4oAAAAASP5riAAAAABzZpcJAAAAAKrZpgYAAAAAWaJRswAAAADVqREtAAAAAKjAN84AAAAACawW3wAAAADrqt14AAAAADA/MwwAAAAA6VoIkQAAAADKRR/qAAAAAGMhqpUAAAAA2gUL3QAAAADMhsOJAAAAAHy9Ru8AAAAApXZ9wwAAAAA6EmlLAAAAAM6zmT0AAAAAkPNYqQAAAAD6QheVAAAAAJ82DQgAAAAAqH9qbAAAAACFQMpFAAAAAMH5ZNsAAAAAq5UO+QAAAABRdDH1AAAAAMJR3gsAAAAAlcaqzQAAAADzWWEdAAAAAIQKqtoAAAAAwrvNwAAAAAD17N09AAAAABGwSVwAAAAA+rnH+AAAAAARTa6vAAAAAJLCGA4AAAAAIGRTswAAAAAbOwKbAAAAAFyofAUAAAAALDaxtAAAAADSpDyBAAAAABSlmB8AAAAAEX19IwAAAACEwHF8AAAAADXsqPwAAAAAe5IE4QAAAACZqU9pAAAAAGzMKdEAAAAAZuqdjQAAAABnaWP4AAAAAPW3T6wAAAAApqmHCgAAAACkUeS0AAAAANd0gnIAAAAAJ1219gAAAADYblUyAAAAANaeA3MAAAAA2JW20QAAAABE13o1AAAAAHE31nUAAAAAmABkGAAAAABaQFzGAAAAAKeAVTQAAAAAOU+S1wAAAABpLdz8AAAAAHOfZiIAAAAAKWLzEgAAAABSkJu/AAAAAKedPQQAAAAACtxWlAAAAABVHrLEAAAAAIgBSwMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
|
| 117 |
+
"cuda_rng_state": "BWVGVHP/DAAAAAAAAAAAAA=="
|
| 118 |
+
}
|
checkpoints/step_00030000/.complete
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"files": {
|
| 4 |
+
"config.json": "7d7a8111595963bc28a9b70cda8402bec27f0d6ca34b35fef100bfe5dba4ebeb",
|
| 5 |
+
"model.safetensors": "6898c498ca598be96f4da7127a55f1c842ac2e4ea3b2e9946511f43c5b844892",
|
| 6 |
+
"optimizer.safetensors": "25600d4529bc5ea43402dbc70deae02e988c52769ef155347e277967d0ef9a22",
|
| 7 |
+
"training_state.json": "2cf288615e89ba446df8bbfdb84ae48551002e8c0659a0ddb53f948b5e24e724"
|
| 8 |
+
}
|
| 9 |
+
}
|
checkpoints/step_00030000/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"checkpoint_type": "pretrain",
|
| 4 |
+
"model_config": {
|
| 5 |
+
"vocab_size": 4278,
|
| 6 |
+
"max_seq_len": 256,
|
| 7 |
+
"n_outcomes": 5,
|
| 8 |
+
"d_model": 256,
|
| 9 |
+
"n_layers": 8,
|
| 10 |
+
"n_heads": 4,
|
| 11 |
+
"d_ff": 1024,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"rope_base": 10000.0
|
| 14 |
+
},
|
| 15 |
+
"training_config": {
|
| 16 |
+
"lr": 0.0003,
|
| 17 |
+
"weight_decay": 0.01,
|
| 18 |
+
"max_grad_norm": 1.0,
|
| 19 |
+
"warmup_steps": 1000,
|
| 20 |
+
"total_steps": 100000,
|
| 21 |
+
"batch_size": 256,
|
| 22 |
+
"max_ply": 256,
|
| 23 |
+
"discard_ply_limit": false,
|
| 24 |
+
"num_workers": 4,
|
| 25 |
+
"use_amp": true,
|
| 26 |
+
"accumulation_steps": 1,
|
| 27 |
+
"log_interval": 10,
|
| 28 |
+
"eval_interval": 500,
|
| 29 |
+
"checkpoint_interval": 5000,
|
| 30 |
+
"base_seed": 42,
|
| 31 |
+
"val_seed": 9223372036854775807,
|
| 32 |
+
"val_games": 512,
|
| 33 |
+
"checkpoint_dir": "checkpoints",
|
| 34 |
+
"log_dir": "logs",
|
| 35 |
+
"use_wandb": false,
|
| 36 |
+
"wandb_project": "pawn",
|
| 37 |
+
"device": "cuda"
|
| 38 |
+
}
|
| 39 |
+
}
|
checkpoints/step_00030000/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6898c498ca598be96f4da7127a55f1c842ac2e4ea3b2e9946511f43c5b844892
|
| 3 |
+
size 38102280
|
checkpoints/step_00030000/optimizer.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25600d4529bc5ea43402dbc70deae02e988c52769ef155347e277967d0ef9a22
|
| 3 |
+
size 76210148
|
checkpoints/step_00030000/training_state.json
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"global_step": 30000,
|
| 4 |
+
"scheduler_state_dict": {
|
| 5 |
+
"step": 30000
|
| 6 |
+
},
|
| 7 |
+
"scaler_state_dict": {
|
| 8 |
+
"scale": 1048576.0,
|
| 9 |
+
"growth_factor": 2.0,
|
| 10 |
+
"backoff_factor": 0.5,
|
| 11 |
+
"growth_interval": 2000,
|
| 12 |
+
"_growth_tracker": 200
|
| 13 |
+
},
|
| 14 |
+
"optimizer_meta": {
|
| 15 |
+
"param_groups": [
|
| 16 |
+
{
|
| 17 |
+
"lr": 0.00024675730776358497,
|
| 18 |
+
"betas": [
|
| 19 |
+
0.9,
|
| 20 |
+
0.999
|
| 21 |
+
],
|
| 22 |
+
"eps": 1e-08,
|
| 23 |
+
"weight_decay": 0.01,
|
| 24 |
+
"amsgrad": false,
|
| 25 |
+
"maximize": false,
|
| 26 |
+
"foreach": null,
|
| 27 |
+
"capturable": false,
|
| 28 |
+
"differentiable": false,
|
| 29 |
+
"fused": null,
|
| 30 |
+
"decoupled_weight_decay": true,
|
| 31 |
+
"params": [
|
| 32 |
+
0,
|
| 33 |
+
1,
|
| 34 |
+
2,
|
| 35 |
+
3,
|
| 36 |
+
4,
|
| 37 |
+
5,
|
| 38 |
+
6,
|
| 39 |
+
7,
|
| 40 |
+
8,
|
| 41 |
+
9,
|
| 42 |
+
10,
|
| 43 |
+
11,
|
| 44 |
+
12,
|
| 45 |
+
13,
|
| 46 |
+
14,
|
| 47 |
+
15,
|
| 48 |
+
16,
|
| 49 |
+
17,
|
| 50 |
+
18,
|
| 51 |
+
19,
|
| 52 |
+
20,
|
| 53 |
+
21,
|
| 54 |
+
22,
|
| 55 |
+
23,
|
| 56 |
+
24,
|
| 57 |
+
25,
|
| 58 |
+
26,
|
| 59 |
+
27,
|
| 60 |
+
28,
|
| 61 |
+
29,
|
| 62 |
+
30,
|
| 63 |
+
31,
|
| 64 |
+
32,
|
| 65 |
+
33,
|
| 66 |
+
34,
|
| 67 |
+
35,
|
| 68 |
+
36,
|
| 69 |
+
37,
|
| 70 |
+
38,
|
| 71 |
+
39,
|
| 72 |
+
40,
|
| 73 |
+
41,
|
| 74 |
+
42,
|
| 75 |
+
43,
|
| 76 |
+
44,
|
| 77 |
+
45,
|
| 78 |
+
46,
|
| 79 |
+
47,
|
| 80 |
+
48,
|
| 81 |
+
49,
|
| 82 |
+
50,
|
| 83 |
+
51,
|
| 84 |
+
52,
|
| 85 |
+
53,
|
| 86 |
+
54,
|
| 87 |
+
55,
|
| 88 |
+
56,
|
| 89 |
+
57,
|
| 90 |
+
58,
|
| 91 |
+
59,
|
| 92 |
+
60,
|
| 93 |
+
61,
|
| 94 |
+
62,
|
| 95 |
+
63,
|
| 96 |
+
64,
|
| 97 |
+
65,
|
| 98 |
+
66,
|
| 99 |
+
67,
|
| 100 |
+
68,
|
| 101 |
+
69,
|
| 102 |
+
70,
|
| 103 |
+
71,
|
| 104 |
+
72,
|
| 105 |
+
73,
|
| 106 |
+
74,
|
| 107 |
+
75,
|
| 108 |
+
76,
|
| 109 |
+
77,
|
| 110 |
+
78
|
| 111 |
+
]
|
| 112 |
+
}
|
| 113 |
+
],
|
| 114 |
+
"scalars": null
|
| 115 |
+
},
|
| 116 |
+
"torch_rng_state": "h08rNwf38rOvAAAAAQAAAMIBAAAAAAAA5bVTKAAAAAC1CIbqAAAAAA3rKqAAAAAAGqogVAAAAABigsYrAAAAAAK5m78AAAAAcE43pAAAAAB+YHT7AAAAABSmyhcAAAAAiwIFRwAAAABru0i1AAAAAEAG8YYAAAAAyvLLfgAAAADgU+nAAAAAAFqEKAgAAAAA72kt/gAAAAC/V34GAAAAAJRKvuQAAAAARwrwvAAAAAB8+1buAAAAAHw2FqQAAAAATEURKwAAAADppdGbAAAAALFK1WsAAAAA9FjHJwAAAAAKRlfWAAAAAAxZlR8AAAAA7PS73AAAAABeXBUoAAAAAIA1IjIAAAAAC2JcuwAAAACKVAI/AAAAAJ4PbRkAAAAA/OdaFAAAAAB2VuHVAAAAAOjy5bMAAAAAKivF/gAAAACA5OnEAAAAAC+aNjMAAAAABNa0HQAAAABgCGjwAAAAACVYnKcAAAAAYnQAHgAAAACed89WAAAAAMk1u3oAAAAAUKriUQAAAAC9t94zAAAAAPbZbRcAAAAAGOth4wAAAAAs80jFAAAAAKRt/bQAAAAAa6dpYQAAAABipCt+AAAAAN56VTYAAAAAXpizpwAAAABgJXGrAAAAAFiUPsEAAAAAvOynQAAAAACWnN22AAAAAEPkFa4AAAAAsqkleAAAAAAVdN9zAAAAAHvqifQAAAAAA0n6LwAAAACAuTx4AAAAAD6Z93cAAAAApS7GTwAAAADjfskvAAAAAA+ewj4AAAAAl9DQLAAAAAD9kSH/AAAAAEA+GYgAAAAAvsbprwAAAAB3NN+0AAAAANc6YrwAAAAAnfkv2gAAAACMQfjeAAAAAGA0pCgAAAAAvTnvHAAAAAC4jLlTAAAAALc9p4wAAAAAZjDhogAAAADAlSqiAAAAAAzt0eMAAAAAguPkrgAAAACEQa6HAAAAAFfNjdEAAAAAGoYWVAAAAABF3yVKAAAAADgonYgAAAAAb6PyRAAAAACG7abbAAAAABqiUFAAAAAA0SzSDAAAAACeoR0/AAAAAOgi1CcAAAAA9Dw29QAAAABFfJluAAAAAP8tr7AAAAAAaLAamQAAAABjRZ8PAAAAALEV3aUAAAAAAIRuuwAAAACHGl7bAAAAAIcEIUUAAAAASi25VAAAAADR+U0fAAAAAOWbOB4AAAAAmvLGFAAAAAAJNxX1AAAAAHtE2ScAAAAAk2NOTgAAAAD+65BYAAAAAOsZFekAAAAAWIKxJAAAAABiKM6BAAAAAE1YYQEAAAAA2sop3wAAAABCIl6iAAAAAP/T7yYAAAAAWZNJcQAAAABFOZsSAAAAAFzETvMAAAAAcFP0/AAAAADurpxIAAAAAIaDQdMAAAAAsS/3TAAAAAA0YVttAAAAAGstmGoAAAAAQqLbLAAAAABXzRZ4AAAAANqZ/18AAAAAdVwB9AAAAADsfpg2AAAAAMy2zKoAAAAACSy3KAAAAABDzRafAAAAAAzi404AAAAA3pVkeQAAAAAQ5caLAAAAAHXDzEcAAAAAS59+2QAAAABV2J9+AAAAAMejV/YAAAAAKwKUxwAAAADKYpLRAAAAAPvntMAAAAAA5v2IfQAAAAC9+IekAAAAABN+sCwAAAAAgOOzyQAAAABjjSj0AAAAAG2AsasAAAAA4CMoZgAAAAAHpm84AAAAANC8aZAAAAAAfiO4WQAAAAAzUg0HAAAAAGyLSTsAAAAAXBS5MQAAAADs/KtnAAAAAJO9YCIAAAAAnoesBAAAAAD0ogF9AAAAABKv9CQAAAAA1gbspQAAAABmhttUAAAAAOJ/TG4AAAAAhS0PpAAAAADx5KyDAAAAADmNIvEAAAAA5KJXxAAAAAAGOjP/AAAAADzZdKcAAAAAA8FPFgAAAAALDaYcAAAAAB0U9pgAAAAAr2EncwAAAABok7yTAAAAACRvTNIAAAAAcQKJkQAAAAAvcQE8AAAAALcp3hEAAAAA9ChDqgAAAACs3b0CAAAAAHROJI0AAAAA1R3GBgAAAAA6qGxzAAAAAOeWbGcAAAAAU9x6SgAAAADH/vP6AAAAAKOI8XIAAAAA8KA2VQAAAAAfGgFFAAAAANtgjI0AAAAAsGlzhQAAAABVwSsRAAAAAGXdNw0AAAAAbQSpewAAAAAx9MXlAAAAAMmuVXQAAAAAkTwfDwAAAAB9+wFIAAAAALLWklQAAAAASRZ3CgAAAABCqD25AAAAADrP138AAAAAA9Li1AAAAABj0TYLAAAAAHNsR/oAAAAA2maP7gAAAAB/GyXyAAAAAEsu8MgAAAAAspXNQgAAAAC1Xgb/AAAAALkz90EAAAAA48lYngAAAABm+qG1AAAAAMcXfW0AAAAAE90WugAAAADnSDIFAAAAAJmpDgYAAAAAnAkyCAAAAAD/iCk2AAAAAElp5lsAAAAAeHAxnAAAAADbQgcKAAAAAJ8CEbIAAAAAVY4u5wAAAAAOrWjNAAAAAMMgCpAAAAAAt+qJqQAAAACUbpfvAAAAAFTRiEYAAAAAi7yHngAAAAAyQ1DYAAAAAGJgz/QAAAAA7solGQAAAABxT00rAAAAAArI5bYAAAAARGqyPAAAAABFr7c7AAAAAFEE51wAAAAAum0rtgAAAAAMHHNDAAAAAM3EjogAAAAA4o430QAAAADk7fE+AAAAAAViqkEAAAAAGes0MwAAAABdjpDvAAAAAEbUJRQAAAAAdrnGOgAAAABrQWIuAAAAACvnK2sAAAAAEyRlJgAAAAC/12WyAAAAAEBI67MAAAAAtGGEZwAAAABGu6sdAAAAALuyVuYAAAAAgx0QzgAAAABtMTt3AAAAAMjVDVsAAAAARt941wAAAAAgkpeRAAAAAFLnED4AAAAAjv2/6wAAAADia1FwAAAAAJnk6jwAAAAAJ3jF/AAAAAAhFYtcAAAAAD0VY2UAAAAAd3N11gAAAACzKnTWAAAAAGIujsQAAAAAooiH2gAAAAB/yEYxAAAAADjjTNQAAAAASNHEhwAAAABtncUMAAAAADSX9KIAAAAAbiZNSAAAAACwhbuVAAAAALLTcSYAAAAAmSzpFAAAAABq7Ct3AAAAAPWp3D4AAAAAAe4gpwAAAACsTU9qAAAAAP2tI4QAAAAARNCL7QAAAAA06+aVAAAAAJoNQN0AAAAAJWtPhwAAAAAXEn70AAAAAJssGqkAAAAAAKFQUQAAAACT9kT0AAAAAFu3JOUAAAAANO5gIAAAAAB+VuXIAAAAAPHeI4QAAAAADT7B/wAAAADfokXMAAAAAJfg/f0AAAAArlfEXAAAAAAkYKSUAAAAAEQpA7IAAAAALJqctgAAAACjeaRfAAAAABqE3tEAAAAA0rJopAAAAAD4yLv2AAAAAFOxR8MAAAAAeEtDDwAAAACwX8hdAAAAAKjoZU0AAAAAV26whwAAAAC5K7eKAAAAACxfVTIAAAAA7fBnGAAAAACd50/yAAAAAFP5Jh8AAAAAiSQPKAAAAADlfY5NAAAAAP/8zuIAAAAAoS9UEgAAAAD7pTW4AAAAAHB5cVIAAAAARbh83AAAAADaocV3AAAAAG7HtekAAAAAmNEfcwAAAAD3YdvOAAAAAJPCtA4AAAAAoxmOwwAAAABrPuElAAAAAMak4r8AAAAAAUC/cgAAAAAhZCuIAAAAAN2mf+kAAAAAx83h5gAAAAAmbYMwAAAAAJiCnrQAAAAAQU55rAAAAADw+3SUAAAAAM/2XnEAAAAAcU07SwAAAADA5e+DAAAAAHttOgkAAAAAiGbszAAAAABluI1oAAAAAH2uALwAAAAAGUF/5wAAAABmMR5KAAAAALSCYAUAAAAAiKGWXwAAAAAnvn1PAAAAAAqH+bsAAAAABwJn9QAAAAAzsIpwAAAAABrZ0GcAAAAAhuYe5wAAAABO+FTkAAAAAM2/KvIAAAAAsRPS3gAAAACdx440AAAAAP4tumYAAAAA+bmCzQAAAADJHxanAAAAAPXucQIAAAAAonSCwAAAAACgjOtnAAAAABoWVc0AAAAAkdoAbwAAAACz7aNzAAAAAKW4ruYAAAAA8cr63gAAAABE6SNJAAAAAD2BPGMAAAAAdnPyEgAAAAD8U+XcAAAAAAcdXwAAAAAAEvif/wAAAAD8rlYaAAAAACG0El4AAAAA3437sgAAAAALlh8WAAAAAG9kxgcAAAAAdz6Z+QAAAACUVLt6AAAAAMOofScAAAAAiWSpbAAAAACQguXTAAAAAFroExIAAAAApWtqzgAAAABNjjRLAAAAAGf7vuUAAAAAHZvizwAAAAAHed+7AAAAABQIe3UAAAAA4XtGOAAAAAASc2AdAAAAANHNDk8AAAAAJzMFKAAAAAAXXfpMAAAAABu0ce0AAAAA4/NkJgAAAAA/+1OAAAAAADu2hoIAAAAAZmCDfwAAAADXQL4fAAAAAAS4ug0AAAAAG+OmAgAAAADu60ldAAAAAMsuxCUAAAAAj8UWaQAAAACr0RxfAAAAAAMdSFQAAAAA36GiRwAAAABk4uomAAAAAFcoM5EAAAAAs+yK7gAAAABcwIvCAAAAAIIBImQAAAAAfG9I6AAAAAAn4QM2AAAAALGuHggAAAAAcaGswgAAAACpMriNAAAAAI7aXk0AAAAAZTO/DgAAAABLMd42AAAAAEU6FlIAAAAAPk5rDAAAAADcO1eQAAAAABikK+4AAAAA6IPhsAAAAAALNbnjAAAAAH5zJeMAAAAAkpFEPwAAAABcpLkXAAAAAEQpk1gAAAAAg1qEDQAAAABVMEyiAAAAANdjUEYAAAAAlsVqdgAAAAAvFPWeAAAAAKqzNcIAAAAA98nh2wAAAAD29MteAAAAAAO9kOUAAAAAt+873wAAAAAAY/ZgAAAAAFdmymYAAAAAh6+t/wAAAACAUVNnAAAAAGFVacwAAAAAhn/nDQAAAADUmhjPAAAAAHmQWBgAAAAAql9jiwAAAACFC0uIAAAAAMPDQeUAAAAAR0NZ1QAAAABBn5deAAAAAB5TwDYAAAAAJuVeBQAAAABkVY1bAAAAAFMthYgAAAAA3ZKyXQAAAABC6R5uAAAAAPHG1l8AAAAA9cob5QAAAABoNpXQAAAAAADVHtoAAAAAoXyWdAAAAABzlNNLAAAAAAd7JTwAAAAAFQy61gAAAADNezk1AAAAAD3N/EMAAAAAqi9OHQAAAADu9V/uAAAAALEpjVkAAAAAy2UzWwAAAAC2chu5AAAAAIVrQEIAAAAAItdI3gAAAADERnBmAAAAAA34/pwAAAAARc9ZfAAAAAAlM5R9AAAAAMUAqCsAAAAAf48ghQAAAADY/1U9AAAAAGIcDcwAAAAAocqYuAAAAAA0etJoAAAAALJFCoYAAAAA3+LS9gAAAAAG6CGqAAAAAIZELuoAAAAAesiBRAAAAADIDXwHAAAAANY8KSQAAAAAzq0RkAAAAADNNOjsAAAAABZVTTsAAAAAMOJYbAAAAABNxQyWAAAAAOtIPlMAAAAAoxn9VQAAAAAyfQpBAAAAAFCxFm4AAAAAeul3ewAAAADixnHqAAAAACqMW+EAAAAAByiTLAAAAACfpwVeAAAAADppPwAAAAAANCiqtgAAAABv7Qs5AAAAAP5TNKcAAAAAOMhxeQAAAAAww45yAAAAALOIDngAAAAApdPt+gAAAABO/05GAAAAAHDeD98AAAAAH7Ye8QAAAADnCh6QAAAAANAEPSwAAAAAlwTimwAAAAB2scxWAAAAAMq2rHkAAAAAB/b8EwAAAAAQSW3SAAAAAMuKLnsAAAAAmRG5UAAAAAC2sGuCAAAAAIpjqUEAAAAAgvq4QgAAAABBhm3kAAAAAGa9p+kAAAAAJYzu4QAAAADz1dQfAAAAANC6C5YAAAAAbvuQowAAAABJEwlLAAAAAHwIY4oAAAAASP5riAAAAABzZpcJAAAAAKrZpgYAAAAAWaJRswAAAADVqREtAAAAAKjAN84AAAAACawW3wAAAADrqt14AAAAADA/MwwAAAAA6VoIkQAAAADKRR/qAAAAAGMhqpUAAAAA2gUL3QAAAADMhsOJAAAAAHy9Ru8AAAAApXZ9wwAAAAA6EmlLAAAAAM6zmT0AAAAAkPNYqQAAAAD6QheVAAAAAJ82DQgAAAAAqH9qbAAAAACFQMpFAAAAAMH5ZNsAAAAAq5UO+QAAAABRdDH1AAAAAMJR3gsAAAAAlcaqzQAAAADzWWEdAAAAAIQKqtoAAAAAwrvNwAAAAAD17N09AAAAABGwSVwAAAAA+rnH+AAAAAARTa6vAAAAAJLCGA4AAAAAIGRTswAAAAAbOwKbAAAAAFyofAUAAAAALDaxtAAAAADSpDyBAAAAABSlmB8AAAAAEX19IwAAAACEwHF8AAAAADXsqPwAAAAAe5IE4QAAAACZqU9pAAAAAGzMKdEAAAAAZuqdjQAAAABnaWP4AAAAAPW3T6wAAAAApqmHCgAAAACkUeS0AAAAANd0gnIAAAAAJ1219gAAAADYblUyAAAAANaeA3MAAAAA2JW20QAAAABE13o1AAAAAHE31nUAAAAAmABkGAAAAABaQFzGAAAAAKeAVTQAAAAAOU+S1wAAAABpLdz8AAAAAHOfZiIAAAAAKWLzEgAAAABSkJu/AAAAAKedPQQAAAAACtxWlAAAAABVHrLEAAAAAIgBSwMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
|
| 117 |
+
"cuda_rng_state": "BWVGVHP/DAAAAAAAAAAAAA=="
|
| 118 |
+
}
|
checkpoints/step_00035000/.complete
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"files": {
|
| 4 |
+
"config.json": "7d7a8111595963bc28a9b70cda8402bec27f0d6ca34b35fef100bfe5dba4ebeb",
|
| 5 |
+
"model.safetensors": "e5a9f5e16da9924652c58cc6b6f786768169ead948c93af3c4943ed25cf5a038",
|
| 6 |
+
"optimizer.safetensors": "c66d9d08168fa28b404928a140e1c595b35edf196d144e8cb300aeb0f739a621",
|
| 7 |
+
"training_state.json": "33e0d35b228c6aceb0a01744688bf6e4daeb746946d627b3725d4b74da94cdd5"
|
| 8 |
+
}
|
| 9 |
+
}
|
checkpoints/step_00035000/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"checkpoint_type": "pretrain",
|
| 4 |
+
"model_config": {
|
| 5 |
+
"vocab_size": 4278,
|
| 6 |
+
"max_seq_len": 256,
|
| 7 |
+
"n_outcomes": 5,
|
| 8 |
+
"d_model": 256,
|
| 9 |
+
"n_layers": 8,
|
| 10 |
+
"n_heads": 4,
|
| 11 |
+
"d_ff": 1024,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"rope_base": 10000.0
|
| 14 |
+
},
|
| 15 |
+
"training_config": {
|
| 16 |
+
"lr": 0.0003,
|
| 17 |
+
"weight_decay": 0.01,
|
| 18 |
+
"max_grad_norm": 1.0,
|
| 19 |
+
"warmup_steps": 1000,
|
| 20 |
+
"total_steps": 100000,
|
| 21 |
+
"batch_size": 256,
|
| 22 |
+
"max_ply": 256,
|
| 23 |
+
"discard_ply_limit": false,
|
| 24 |
+
"num_workers": 4,
|
| 25 |
+
"use_amp": true,
|
| 26 |
+
"accumulation_steps": 1,
|
| 27 |
+
"log_interval": 10,
|
| 28 |
+
"eval_interval": 500,
|
| 29 |
+
"checkpoint_interval": 5000,
|
| 30 |
+
"base_seed": 42,
|
| 31 |
+
"val_seed": 9223372036854775807,
|
| 32 |
+
"val_games": 512,
|
| 33 |
+
"checkpoint_dir": "checkpoints",
|
| 34 |
+
"log_dir": "logs",
|
| 35 |
+
"use_wandb": false,
|
| 36 |
+
"wandb_project": "pawn",
|
| 37 |
+
"device": "cuda"
|
| 38 |
+
}
|
| 39 |
+
}
|
checkpoints/step_00035000/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5a9f5e16da9924652c58cc6b6f786768169ead948c93af3c4943ed25cf5a038
|
| 3 |
+
size 38102280
|
checkpoints/step_00035000/optimizer.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c66d9d08168fa28b404928a140e1c595b35edf196d144e8cb300aeb0f739a621
|
| 3 |
+
size 76210148
|
checkpoints/step_00035000/training_state.json
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"global_step": 35000,
|
| 4 |
+
"scheduler_state_dict": {
|
| 5 |
+
"step": 35000
|
| 6 |
+
},
|
| 7 |
+
"scaler_state_dict": {
|
| 8 |
+
"scale": 524288.0,
|
| 9 |
+
"growth_factor": 2.0,
|
| 10 |
+
"backoff_factor": 0.5,
|
| 11 |
+
"growth_interval": 2000,
|
| 12 |
+
"_growth_tracker": 1834
|
| 13 |
+
},
|
| 14 |
+
"optimizer_meta": {
|
| 15 |
+
"param_groups": [
|
| 16 |
+
{
|
| 17 |
+
"lr": 0.00022875659509431217,
|
| 18 |
+
"betas": [
|
| 19 |
+
0.9,
|
| 20 |
+
0.999
|
| 21 |
+
],
|
| 22 |
+
"eps": 1e-08,
|
| 23 |
+
"weight_decay": 0.01,
|
| 24 |
+
"amsgrad": false,
|
| 25 |
+
"maximize": false,
|
| 26 |
+
"foreach": null,
|
| 27 |
+
"capturable": false,
|
| 28 |
+
"differentiable": false,
|
| 29 |
+
"fused": null,
|
| 30 |
+
"decoupled_weight_decay": true,
|
| 31 |
+
"params": [
|
| 32 |
+
0,
|
| 33 |
+
1,
|
| 34 |
+
2,
|
| 35 |
+
3,
|
| 36 |
+
4,
|
| 37 |
+
5,
|
| 38 |
+
6,
|
| 39 |
+
7,
|
| 40 |
+
8,
|
| 41 |
+
9,
|
| 42 |
+
10,
|
| 43 |
+
11,
|
| 44 |
+
12,
|
| 45 |
+
13,
|
| 46 |
+
14,
|
| 47 |
+
15,
|
| 48 |
+
16,
|
| 49 |
+
17,
|
| 50 |
+
18,
|
| 51 |
+
19,
|
| 52 |
+
20,
|
| 53 |
+
21,
|
| 54 |
+
22,
|
| 55 |
+
23,
|
| 56 |
+
24,
|
| 57 |
+
25,
|
| 58 |
+
26,
|
| 59 |
+
27,
|
| 60 |
+
28,
|
| 61 |
+
29,
|
| 62 |
+
30,
|
| 63 |
+
31,
|
| 64 |
+
32,
|
| 65 |
+
33,
|
| 66 |
+
34,
|
| 67 |
+
35,
|
| 68 |
+
36,
|
| 69 |
+
37,
|
| 70 |
+
38,
|
| 71 |
+
39,
|
| 72 |
+
40,
|
| 73 |
+
41,
|
| 74 |
+
42,
|
| 75 |
+
43,
|
| 76 |
+
44,
|
| 77 |
+
45,
|
| 78 |
+
46,
|
| 79 |
+
47,
|
| 80 |
+
48,
|
| 81 |
+
49,
|
| 82 |
+
50,
|
| 83 |
+
51,
|
| 84 |
+
52,
|
| 85 |
+
53,
|
| 86 |
+
54,
|
| 87 |
+
55,
|
| 88 |
+
56,
|
| 89 |
+
57,
|
| 90 |
+
58,
|
| 91 |
+
59,
|
| 92 |
+
60,
|
| 93 |
+
61,
|
| 94 |
+
62,
|
| 95 |
+
63,
|
| 96 |
+
64,
|
| 97 |
+
65,
|
| 98 |
+
66,
|
| 99 |
+
67,
|
| 100 |
+
68,
|
| 101 |
+
69,
|
| 102 |
+
70,
|
| 103 |
+
71,
|
| 104 |
+
72,
|
| 105 |
+
73,
|
| 106 |
+
74,
|
| 107 |
+
75,
|
| 108 |
+
76,
|
| 109 |
+
77,
|
| 110 |
+
78
|
| 111 |
+
]
|
| 112 |
+
}
|
| 113 |
+
],
|
| 114 |
+
"scalars": null
|
| 115 |
+
},
|
| 116 |
+
"torch_rng_state": "h08rNwf38rOvAAAAAQAAAMIBAAAAAAAA5bVTKAAAAAC1CIbqAAAAAA3rKqAAAAAAGqogVAAAAABigsYrAAAAAAK5m78AAAAAcE43pAAAAAB+YHT7AAAAABSmyhcAAAAAiwIFRwAAAABru0i1AAAAAEAG8YYAAAAAyvLLfgAAAADgU+nAAAAAAFqEKAgAAAAA72kt/gAAAAC/V34GAAAAAJRKvuQAAAAARwrwvAAAAAB8+1buAAAAAHw2FqQAAAAATEURKwAAAADppdGbAAAAALFK1WsAAAAA9FjHJwAAAAAKRlfWAAAAAAxZlR8AAAAA7PS73AAAAABeXBUoAAAAAIA1IjIAAAAAC2JcuwAAAACKVAI/AAAAAJ4PbRkAAAAA/OdaFAAAAAB2VuHVAAAAAOjy5bMAAAAAKivF/gAAAACA5OnEAAAAAC+aNjMAAAAABNa0HQAAAABgCGjwAAAAACVYnKcAAAAAYnQAHgAAAACed89WAAAAAMk1u3oAAAAAUKriUQAAAAC9t94zAAAAAPbZbRcAAAAAGOth4wAAAAAs80jFAAAAAKRt/bQAAAAAa6dpYQAAAABipCt+AAAAAN56VTYAAAAAXpizpwAAAABgJXGrAAAAAFiUPsEAAAAAvOynQAAAAACWnN22AAAAAEPkFa4AAAAAsqkleAAAAAAVdN9zAAAAAHvqifQAAAAAA0n6LwAAAACAuTx4AAAAAD6Z93cAAAAApS7GTwAAAADjfskvAAAAAA+ewj4AAAAAl9DQLAAAAAD9kSH/AAAAAEA+GYgAAAAAvsbprwAAAAB3NN+0AAAAANc6YrwAAAAAnfkv2gAAAACMQfjeAAAAAGA0pCgAAAAAvTnvHAAAAAC4jLlTAAAAALc9p4wAAAAAZjDhogAAAADAlSqiAAAAAAzt0eMAAAAAguPkrgAAAACEQa6HAAAAAFfNjdEAAAAAGoYWVAAAAABF3yVKAAAAADgonYgAAAAAb6PyRAAAAACG7abbAAAAABqiUFAAAAAA0SzSDAAAAACeoR0/AAAAAOgi1CcAAAAA9Dw29QAAAABFfJluAAAAAP8tr7AAAAAAaLAamQAAAABjRZ8PAAAAALEV3aUAAAAAAIRuuwAAAACHGl7bAAAAAIcEIUUAAAAASi25VAAAAADR+U0fAAAAAOWbOB4AAAAAmvLGFAAAAAAJNxX1AAAAAHtE2ScAAAAAk2NOTgAAAAD+65BYAAAAAOsZFekAAAAAWIKxJAAAAABiKM6BAAAAAE1YYQEAAAAA2sop3wAAAABCIl6iAAAAAP/T7yYAAAAAWZNJcQAAAABFOZsSAAAAAFzETvMAAAAAcFP0/AAAAADurpxIAAAAAIaDQdMAAAAAsS/3TAAAAAA0YVttAAAAAGstmGoAAAAAQqLbLAAAAABXzRZ4AAAAANqZ/18AAAAAdVwB9AAAAADsfpg2AAAAAMy2zKoAAAAACSy3KAAAAABDzRafAAAAAAzi404AAAAA3pVkeQAAAAAQ5caLAAAAAHXDzEcAAAAAS59+2QAAAABV2J9+AAAAAMejV/YAAAAAKwKUxwAAAADKYpLRAAAAAPvntMAAAAAA5v2IfQAAAAC9+IekAAAAABN+sCwAAAAAgOOzyQAAAABjjSj0AAAAAG2AsasAAAAA4CMoZgAAAAAHpm84AAAAANC8aZAAAAAAfiO4WQAAAAAzUg0HAAAAAGyLSTsAAAAAXBS5MQAAAADs/KtnAAAAAJO9YCIAAAAAnoesBAAAAAD0ogF9AAAAABKv9CQAAAAA1gbspQAAAABmhttUAAAAAOJ/TG4AAAAAhS0PpAAAAADx5KyDAAAAADmNIvEAAAAA5KJXxAAAAAAGOjP/AAAAADzZdKcAAAAAA8FPFgAAAAALDaYcAAAAAB0U9pgAAAAAr2EncwAAAABok7yTAAAAACRvTNIAAAAAcQKJkQAAAAAvcQE8AAAAALcp3hEAAAAA9ChDqgAAAACs3b0CAAAAAHROJI0AAAAA1R3GBgAAAAA6qGxzAAAAAOeWbGcAAAAAU9x6SgAAAADH/vP6AAAAAKOI8XIAAAAA8KA2VQAAAAAfGgFFAAAAANtgjI0AAAAAsGlzhQAAAABVwSsRAAAAAGXdNw0AAAAAbQSpewAAAAAx9MXlAAAAAMmuVXQAAAAAkTwfDwAAAAB9+wFIAAAAALLWklQAAAAASRZ3CgAAAABCqD25AAAAADrP138AAAAAA9Li1AAAAABj0TYLAAAAAHNsR/oAAAAA2maP7gAAAAB/GyXyAAAAAEsu8MgAAAAAspXNQgAAAAC1Xgb/AAAAALkz90EAAAAA48lYngAAAABm+qG1AAAAAMcXfW0AAAAAE90WugAAAADnSDIFAAAAAJmpDgYAAAAAnAkyCAAAAAD/iCk2AAAAAElp5lsAAAAAeHAxnAAAAADbQgcKAAAAAJ8CEbIAAAAAVY4u5wAAAAAOrWjNAAAAAMMgCpAAAAAAt+qJqQAAAACUbpfvAAAAAFTRiEYAAAAAi7yHngAAAAAyQ1DYAAAAAGJgz/QAAAAA7solGQAAAABxT00rAAAAAArI5bYAAAAARGqyPAAAAABFr7c7AAAAAFEE51wAAAAAum0rtgAAAAAMHHNDAAAAAM3EjogAAAAA4o430QAAAADk7fE+AAAAAAViqkEAAAAAGes0MwAAAABdjpDvAAAAAEbUJRQAAAAAdrnGOgAAAABrQWIuAAAAACvnK2sAAAAAEyRlJgAAAAC/12WyAAAAAEBI67MAAAAAtGGEZwAAAABGu6sdAAAAALuyVuYAAAAAgx0QzgAAAABtMTt3AAAAAMjVDVsAAAAARt941wAAAAAgkpeRAAAAAFLnED4AAAAAjv2/6wAAAADia1FwAAAAAJnk6jwAAAAAJ3jF/AAAAAAhFYtcAAAAAD0VY2UAAAAAd3N11gAAAACzKnTWAAAAAGIujsQAAAAAooiH2gAAAAB/yEYxAAAAADjjTNQAAAAASNHEhwAAAABtncUMAAAAADSX9KIAAAAAbiZNSAAAAACwhbuVAAAAALLTcSYAAAAAmSzpFAAAAABq7Ct3AAAAAPWp3D4AAAAAAe4gpwAAAACsTU9qAAAAAP2tI4QAAAAARNCL7QAAAAA06+aVAAAAAJoNQN0AAAAAJWtPhwAAAAAXEn70AAAAAJssGqkAAAAAAKFQUQAAAACT9kT0AAAAAFu3JOUAAAAANO5gIAAAAAB+VuXIAAAAAPHeI4QAAAAADT7B/wAAAADfokXMAAAAAJfg/f0AAAAArlfEXAAAAAAkYKSUAAAAAEQpA7IAAAAALJqctgAAAACjeaRfAAAAABqE3tEAAAAA0rJopAAAAAD4yLv2AAAAAFOxR8MAAAAAeEtDDwAAAACwX8hdAAAAAKjoZU0AAAAAV26whwAAAAC5K7eKAAAAACxfVTIAAAAA7fBnGAAAAACd50/yAAAAAFP5Jh8AAAAAiSQPKAAAAADlfY5NAAAAAP/8zuIAAAAAoS9UEgAAAAD7pTW4AAAAAHB5cVIAAAAARbh83AAAAADaocV3AAAAAG7HtekAAAAAmNEfcwAAAAD3YdvOAAAAAJPCtA4AAAAAoxmOwwAAAABrPuElAAAAAMak4r8AAAAAAUC/cgAAAAAhZCuIAAAAAN2mf+kAAAAAx83h5gAAAAAmbYMwAAAAAJiCnrQAAAAAQU55rAAAAADw+3SUAAAAAM/2XnEAAAAAcU07SwAAAADA5e+DAAAAAHttOgkAAAAAiGbszAAAAABluI1oAAAAAH2uALwAAAAAGUF/5wAAAABmMR5KAAAAALSCYAUAAAAAiKGWXwAAAAAnvn1PAAAAAAqH+bsAAAAABwJn9QAAAAAzsIpwAAAAABrZ0GcAAAAAhuYe5wAAAABO+FTkAAAAAM2/KvIAAAAAsRPS3gAAAACdx440AAAAAP4tumYAAAAA+bmCzQAAAADJHxanAAAAAPXucQIAAAAAonSCwAAAAACgjOtnAAAAABoWVc0AAAAAkdoAbwAAAACz7aNzAAAAAKW4ruYAAAAA8cr63gAAAABE6SNJAAAAAD2BPGMAAAAAdnPyEgAAAAD8U+XcAAAAAAcdXwAAAAAAEvif/wAAAAD8rlYaAAAAACG0El4AAAAA3437sgAAAAALlh8WAAAAAG9kxgcAAAAAdz6Z+QAAAACUVLt6AAAAAMOofScAAAAAiWSpbAAAAACQguXTAAAAAFroExIAAAAApWtqzgAAAABNjjRLAAAAAGf7vuUAAAAAHZvizwAAAAAHed+7AAAAABQIe3UAAAAA4XtGOAAAAAASc2AdAAAAANHNDk8AAAAAJzMFKAAAAAAXXfpMAAAAABu0ce0AAAAA4/NkJgAAAAA/+1OAAAAAADu2hoIAAAAAZmCDfwAAAADXQL4fAAAAAAS4ug0AAAAAG+OmAgAAAADu60ldAAAAAMsuxCUAAAAAj8UWaQAAAACr0RxfAAAAAAMdSFQAAAAA36GiRwAAAABk4uomAAAAAFcoM5EAAAAAs+yK7gAAAABcwIvCAAAAAIIBImQAAAAAfG9I6AAAAAAn4QM2AAAAALGuHggAAAAAcaGswgAAAACpMriNAAAAAI7aXk0AAAAAZTO/DgAAAABLMd42AAAAAEU6FlIAAAAAPk5rDAAAAADcO1eQAAAAABikK+4AAAAA6IPhsAAAAAALNbnjAAAAAH5zJeMAAAAAkpFEPwAAAABcpLkXAAAAAEQpk1gAAAAAg1qEDQAAAABVMEyiAAAAANdjUEYAAAAAlsVqdgAAAAAvFPWeAAAAAKqzNcIAAAAA98nh2wAAAAD29MteAAAAAAO9kOUAAAAAt+873wAAAAAAY/ZgAAAAAFdmymYAAAAAh6+t/wAAAACAUVNnAAAAAGFVacwAAAAAhn/nDQAAAADUmhjPAAAAAHmQWBgAAAAAql9jiwAAAACFC0uIAAAAAMPDQeUAAAAAR0NZ1QAAAABBn5deAAAAAB5TwDYAAAAAJuVeBQAAAABkVY1bAAAAAFMthYgAAAAA3ZKyXQAAAABC6R5uAAAAAPHG1l8AAAAA9cob5QAAAABoNpXQAAAAAADVHtoAAAAAoXyWdAAAAABzlNNLAAAAAAd7JTwAAAAAFQy61gAAAADNezk1AAAAAD3N/EMAAAAAqi9OHQAAAADu9V/uAAAAALEpjVkAAAAAy2UzWwAAAAC2chu5AAAAAIVrQEIAAAAAItdI3gAAAADERnBmAAAAAA34/pwAAAAARc9ZfAAAAAAlM5R9AAAAAMUAqCsAAAAAf48ghQAAAADY/1U9AAAAAGIcDcwAAAAAocqYuAAAAAA0etJoAAAAALJFCoYAAAAA3+LS9gAAAAAG6CGqAAAAAIZELuoAAAAAesiBRAAAAADIDXwHAAAAANY8KSQAAAAAzq0RkAAAAADNNOjsAAAAABZVTTsAAAAAMOJYbAAAAABNxQyWAAAAAOtIPlMAAAAAoxn9VQAAAAAyfQpBAAAAAFCxFm4AAAAAeul3ewAAAADixnHqAAAAACqMW+EAAAAAByiTLAAAAACfpwVeAAAAADppPwAAAAAANCiqtgAAAABv7Qs5AAAAAP5TNKcAAAAAOMhxeQAAAAAww45yAAAAALOIDngAAAAApdPt+gAAAABO/05GAAAAAHDeD98AAAAAH7Ye8QAAAADnCh6QAAAAANAEPSwAAAAAlwTimwAAAAB2scxWAAAAAMq2rHkAAAAAB/b8EwAAAAAQSW3SAAAAAMuKLnsAAAAAmRG5UAAAAAC2sGuCAAAAAIpjqUEAAAAAgvq4QgAAAABBhm3kAAAAAGa9p+kAAAAAJYzu4QAAAADz1dQfAAAAANC6C5YAAAAAbvuQowAAAABJEwlLAAAAAHwIY4oAAAAASP5riAAAAABzZpcJAAAAAKrZpgYAAAAAWaJRswAAAADVqREtAAAAAKjAN84AAAAACawW3wAAAADrqt14AAAAADA/MwwAAAAA6VoIkQAAAADKRR/qAAAAAGMhqpUAAAAA2gUL3QAAAADMhsOJAAAAAHy9Ru8AAAAApXZ9wwAAAAA6EmlLAAAAAM6zmT0AAAAAkPNYqQAAAAD6QheVAAAAAJ82DQgAAAAAqH9qbAAAAACFQMpFAAAAAMH5ZNsAAAAAq5UO+QAAAABRdDH1AAAAAMJR3gsAAAAAlcaqzQAAAADzWWEdAAAAAIQKqtoAAAAAwrvNwAAAAAD17N09AAAAABGwSVwAAAAA+rnH+AAAAAARTa6vAAAAAJLCGA4AAAAAIGRTswAAAAAbOwKbAAAAAFyofAUAAAAALDaxtAAAAADSpDyBAAAAABSlmB8AAAAAEX19IwAAAACEwHF8AAAAADXsqPwAAAAAe5IE4QAAAACZqU9pAAAAAGzMKdEAAAAAZuqdjQAAAABnaWP4AAAAAPW3T6wAAAAApqmHCgAAAACkUeS0AAAAANd0gnIAAAAAJ1219gAAAADYblUyAAAAANaeA3MAAAAA2JW20QAAAABE13o1AAAAAHE31nUAAAAAmABkGAAAAABaQFzGAAAAAKeAVTQAAAAAOU+S1wAAAABpLdz8AAAAAHOfZiIAAAAAKWLzEgAAAABSkJu/AAAAAKedPQQAAAAACtxWlAAAAABVHrLEAAAAAIgBSwMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
|
| 117 |
+
"cuda_rng_state": "BWVGVHP/DAAAAAAAAAAAAA=="
|
| 118 |
+
}
|
checkpoints/step_00040000/.complete
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"files": {
|
| 4 |
+
"config.json": "7d7a8111595963bc28a9b70cda8402bec27f0d6ca34b35fef100bfe5dba4ebeb",
|
| 5 |
+
"model.safetensors": "815271df0e112ca022c4cedc658f10cda2c7a5c589ac010f3ddf7b8953701df2",
|
| 6 |
+
"optimizer.safetensors": "3656ae361cdb40deb44ae1d14e1574966bab5ed96a35e75b6d025814a90c8a61",
|
| 7 |
+
"training_state.json": "c322c34d64a1c7e147d32fea520e720506033111faea1ba76f11bea3bab41621"
|
| 8 |
+
}
|
| 9 |
+
}
|
checkpoints/step_00040000/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"checkpoint_type": "pretrain",
|
| 4 |
+
"model_config": {
|
| 5 |
+
"vocab_size": 4278,
|
| 6 |
+
"max_seq_len": 256,
|
| 7 |
+
"n_outcomes": 5,
|
| 8 |
+
"d_model": 256,
|
| 9 |
+
"n_layers": 8,
|
| 10 |
+
"n_heads": 4,
|
| 11 |
+
"d_ff": 1024,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"rope_base": 10000.0
|
| 14 |
+
},
|
| 15 |
+
"training_config": {
|
| 16 |
+
"lr": 0.0003,
|
| 17 |
+
"weight_decay": 0.01,
|
| 18 |
+
"max_grad_norm": 1.0,
|
| 19 |
+
"warmup_steps": 1000,
|
| 20 |
+
"total_steps": 100000,
|
| 21 |
+
"batch_size": 256,
|
| 22 |
+
"max_ply": 256,
|
| 23 |
+
"discard_ply_limit": false,
|
| 24 |
+
"num_workers": 4,
|
| 25 |
+
"use_amp": true,
|
| 26 |
+
"accumulation_steps": 1,
|
| 27 |
+
"log_interval": 10,
|
| 28 |
+
"eval_interval": 500,
|
| 29 |
+
"checkpoint_interval": 5000,
|
| 30 |
+
"base_seed": 42,
|
| 31 |
+
"val_seed": 9223372036854775807,
|
| 32 |
+
"val_games": 512,
|
| 33 |
+
"checkpoint_dir": "checkpoints",
|
| 34 |
+
"log_dir": "logs",
|
| 35 |
+
"use_wandb": false,
|
| 36 |
+
"wandb_project": "pawn",
|
| 37 |
+
"device": "cuda"
|
| 38 |
+
}
|
| 39 |
+
}
|
checkpoints/step_00040000/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:815271df0e112ca022c4cedc658f10cda2c7a5c589ac010f3ddf7b8953701df2
|
| 3 |
+
size 38102280
|
checkpoints/step_00040000/optimizer.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3656ae361cdb40deb44ae1d14e1574966bab5ed96a35e75b6d025814a90c8a61
|
| 3 |
+
size 76210148
|
checkpoints/step_00040000/training_state.json
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"global_step": 40000,
|
| 4 |
+
"scheduler_state_dict": {
|
| 5 |
+
"step": 40000
|
| 6 |
+
},
|
| 7 |
+
"scaler_state_dict": {
|
| 8 |
+
"scale": 1048576.0,
|
| 9 |
+
"growth_factor": 2.0,
|
| 10 |
+
"backoff_factor": 0.5,
|
| 11 |
+
"growth_interval": 2000,
|
| 12 |
+
"_growth_tracker": 417
|
| 13 |
+
},
|
| 14 |
+
"optimizer_meta": {
|
| 15 |
+
"param_groups": [
|
| 16 |
+
{
|
| 17 |
+
"lr": 0.00020915417504785194,
|
| 18 |
+
"betas": [
|
| 19 |
+
0.9,
|
| 20 |
+
0.999
|
| 21 |
+
],
|
| 22 |
+
"eps": 1e-08,
|
| 23 |
+
"weight_decay": 0.01,
|
| 24 |
+
"amsgrad": false,
|
| 25 |
+
"maximize": false,
|
| 26 |
+
"foreach": null,
|
| 27 |
+
"capturable": false,
|
| 28 |
+
"differentiable": false,
|
| 29 |
+
"fused": null,
|
| 30 |
+
"decoupled_weight_decay": true,
|
| 31 |
+
"params": [
|
| 32 |
+
0,
|
| 33 |
+
1,
|
| 34 |
+
2,
|
| 35 |
+
3,
|
| 36 |
+
4,
|
| 37 |
+
5,
|
| 38 |
+
6,
|
| 39 |
+
7,
|
| 40 |
+
8,
|
| 41 |
+
9,
|
| 42 |
+
10,
|
| 43 |
+
11,
|
| 44 |
+
12,
|
| 45 |
+
13,
|
| 46 |
+
14,
|
| 47 |
+
15,
|
| 48 |
+
16,
|
| 49 |
+
17,
|
| 50 |
+
18,
|
| 51 |
+
19,
|
| 52 |
+
20,
|
| 53 |
+
21,
|
| 54 |
+
22,
|
| 55 |
+
23,
|
| 56 |
+
24,
|
| 57 |
+
25,
|
| 58 |
+
26,
|
| 59 |
+
27,
|
| 60 |
+
28,
|
| 61 |
+
29,
|
| 62 |
+
30,
|
| 63 |
+
31,
|
| 64 |
+
32,
|
| 65 |
+
33,
|
| 66 |
+
34,
|
| 67 |
+
35,
|
| 68 |
+
36,
|
| 69 |
+
37,
|
| 70 |
+
38,
|
| 71 |
+
39,
|
| 72 |
+
40,
|
| 73 |
+
41,
|
| 74 |
+
42,
|
| 75 |
+
43,
|
| 76 |
+
44,
|
| 77 |
+
45,
|
| 78 |
+
46,
|
| 79 |
+
47,
|
| 80 |
+
48,
|
| 81 |
+
49,
|
| 82 |
+
50,
|
| 83 |
+
51,
|
| 84 |
+
52,
|
| 85 |
+
53,
|
| 86 |
+
54,
|
| 87 |
+
55,
|
| 88 |
+
56,
|
| 89 |
+
57,
|
| 90 |
+
58,
|
| 91 |
+
59,
|
| 92 |
+
60,
|
| 93 |
+
61,
|
| 94 |
+
62,
|
| 95 |
+
63,
|
| 96 |
+
64,
|
| 97 |
+
65,
|
| 98 |
+
66,
|
| 99 |
+
67,
|
| 100 |
+
68,
|
| 101 |
+
69,
|
| 102 |
+
70,
|
| 103 |
+
71,
|
| 104 |
+
72,
|
| 105 |
+
73,
|
| 106 |
+
74,
|
| 107 |
+
75,
|
| 108 |
+
76,
|
| 109 |
+
77,
|
| 110 |
+
78
|
| 111 |
+
]
|
| 112 |
+
}
|
| 113 |
+
],
|
| 114 |
+
"scalars": null
|
| 115 |
+
},
|
| 116 |
+
"torch_rng_state": "h08rNwf38rOvAAAAAQAAAMIBAAAAAAAA5bVTKAAAAAC1CIbqAAAAAA3rKqAAAAAAGqogVAAAAABigsYrAAAAAAK5m78AAAAAcE43pAAAAAB+YHT7AAAAABSmyhcAAAAAiwIFRwAAAABru0i1AAAAAEAG8YYAAAAAyvLLfgAAAADgU+nAAAAAAFqEKAgAAAAA72kt/gAAAAC/V34GAAAAAJRKvuQAAAAARwrwvAAAAAB8+1buAAAAAHw2FqQAAAAATEURKwAAAADppdGbAAAAALFK1WsAAAAA9FjHJwAAAAAKRlfWAAAAAAxZlR8AAAAA7PS73AAAAABeXBUoAAAAAIA1IjIAAAAAC2JcuwAAAACKVAI/AAAAAJ4PbRkAAAAA/OdaFAAAAAB2VuHVAAAAAOjy5bMAAAAAKivF/gAAAACA5OnEAAAAAC+aNjMAAAAABNa0HQAAAABgCGjwAAAAACVYnKcAAAAAYnQAHgAAAACed89WAAAAAMk1u3oAAAAAUKriUQAAAAC9t94zAAAAAPbZbRcAAAAAGOth4wAAAAAs80jFAAAAAKRt/bQAAAAAa6dpYQAAAABipCt+AAAAAN56VTYAAAAAXpizpwAAAABgJXGrAAAAAFiUPsEAAAAAvOynQAAAAACWnN22AAAAAEPkFa4AAAAAsqkleAAAAAAVdN9zAAAAAHvqifQAAAAAA0n6LwAAAACAuTx4AAAAAD6Z93cAAAAApS7GTwAAAADjfskvAAAAAA+ewj4AAAAAl9DQLAAAAAD9kSH/AAAAAEA+GYgAAAAAvsbprwAAAAB3NN+0AAAAANc6YrwAAAAAnfkv2gAAAACMQfjeAAAAAGA0pCgAAAAAvTnvHAAAAAC4jLlTAAAAALc9p4wAAAAAZjDhogAAAADAlSqiAAAAAAzt0eMAAAAAguPkrgAAAACEQa6HAAAAAFfNjdEAAAAAGoYWVAAAAABF3yVKAAAAADgonYgAAAAAb6PyRAAAAACG7abbAAAAABqiUFAAAAAA0SzSDAAAAACeoR0/AAAAAOgi1CcAAAAA9Dw29QAAAABFfJluAAAAAP8tr7AAAAAAaLAamQAAAABjRZ8PAAAAALEV3aUAAAAAAIRuuwAAAACHGl7bAAAAAIcEIUUAAAAASi25VAAAAADR+U0fAAAAAOWbOB4AAAAAmvLGFAAAAAAJNxX1AAAAAHtE2ScAAAAAk2NOTgAAAAD+65BYAAAAAOsZFekAAAAAWIKxJAAAAABiKM6BAAAAAE1YYQEAAAAA2sop3wAAAABCIl6iAAAAAP/T7yYAAAAAWZNJcQAAAABFOZsSAAAAAFzETvMAAAAAcFP0/AAAAADurpxIAAAAAIaDQdMAAAAAsS/3TAAAAAA0YVttAAAAAGstmGoAAAAAQqLbLAAAAABXzRZ4AAAAANqZ/18AAAAAdVwB9AAAAADsfpg2AAAAAMy2zKoAAAAACSy3KAAAAABDzRafAAAAAAzi404AAAAA3pVkeQAAAAAQ5caLAAAAAHXDzEcAAAAAS59+2QAAAABV2J9+AAAAAMejV/YAAAAAKwKUxwAAAADKYpLRAAAAAPvntMAAAAAA5v2IfQAAAAC9+IekAAAAABN+sCwAAAAAgOOzyQAAAABjjSj0AAAAAG2AsasAAAAA4CMoZgAAAAAHpm84AAAAANC8aZAAAAAAfiO4WQAAAAAzUg0HAAAAAGyLSTsAAAAAXBS5MQAAAADs/KtnAAAAAJO9YCIAAAAAnoesBAAAAAD0ogF9AAAAABKv9CQAAAAA1gbspQAAAABmhttUAAAAAOJ/TG4AAAAAhS0PpAAAAADx5KyDAAAAADmNIvEAAAAA5KJXxAAAAAAGOjP/AAAAADzZdKcAAAAAA8FPFgAAAAALDaYcAAAAAB0U9pgAAAAAr2EncwAAAABok7yTAAAAACRvTNIAAAAAcQKJkQAAAAAvcQE8AAAAALcp3hEAAAAA9ChDqgAAAACs3b0CAAAAAHROJI0AAAAA1R3GBgAAAAA6qGxzAAAAAOeWbGcAAAAAU9x6SgAAAADH/vP6AAAAAKOI8XIAAAAA8KA2VQAAAAAfGgFFAAAAANtgjI0AAAAAsGlzhQAAAABVwSsRAAAAAGXdNw0AAAAAbQSpewAAAAAx9MXlAAAAAMmuVXQAAAAAkTwfDwAAAAB9+wFIAAAAALLWklQAAAAASRZ3CgAAAABCqD25AAAAADrP138AAAAAA9Li1AAAAABj0TYLAAAAAHNsR/oAAAAA2maP7gAAAAB/GyXyAAAAAEsu8MgAAAAAspXNQgAAAAC1Xgb/AAAAALkz90EAAAAA48lYngAAAABm+qG1AAAAAMcXfW0AAAAAE90WugAAAADnSDIFAAAAAJmpDgYAAAAAnAkyCAAAAAD/iCk2AAAAAElp5lsAAAAAeHAxnAAAAADbQgcKAAAAAJ8CEbIAAAAAVY4u5wAAAAAOrWjNAAAAAMMgCpAAAAAAt+qJqQAAAACUbpfvAAAAAFTRiEYAAAAAi7yHngAAAAAyQ1DYAAAAAGJgz/QAAAAA7solGQAAAABxT00rAAAAAArI5bYAAAAARGqyPAAAAABFr7c7AAAAAFEE51wAAAAAum0rtgAAAAAMHHNDAAAAAM3EjogAAAAA4o430QAAAADk7fE+AAAAAAViqkEAAAAAGes0MwAAAABdjpDvAAAAAEbUJRQAAAAAdrnGOgAAAABrQWIuAAAAACvnK2sAAAAAEyRlJgAAAAC/12WyAAAAAEBI67MAAAAAtGGEZwAAAABGu6sdAAAAALuyVuYAAAAAgx0QzgAAAABtMTt3AAAAAMjVDVsAAAAARt941wAAAAAgkpeRAAAAAFLnED4AAAAAjv2/6wAAAADia1FwAAAAAJnk6jwAAAAAJ3jF/AAAAAAhFYtcAAAAAD0VY2UAAAAAd3N11gAAAACzKnTWAAAAAGIujsQAAAAAooiH2gAAAAB/yEYxAAAAADjjTNQAAAAASNHEhwAAAABtncUMAAAAADSX9KIAAAAAbiZNSAAAAACwhbuVAAAAALLTcSYAAAAAmSzpFAAAAABq7Ct3AAAAAPWp3D4AAAAAAe4gpwAAAACsTU9qAAAAAP2tI4QAAAAARNCL7QAAAAA06+aVAAAAAJoNQN0AAAAAJWtPhwAAAAAXEn70AAAAAJssGqkAAAAAAKFQUQAAAACT9kT0AAAAAFu3JOUAAAAANO5gIAAAAAB+VuXIAAAAAPHeI4QAAAAADT7B/wAAAADfokXMAAAAAJfg/f0AAAAArlfEXAAAAAAkYKSUAAAAAEQpA7IAAAAALJqctgAAAACjeaRfAAAAABqE3tEAAAAA0rJopAAAAAD4yLv2AAAAAFOxR8MAAAAAeEtDDwAAAACwX8hdAAAAAKjoZU0AAAAAV26whwAAAAC5K7eKAAAAACxfVTIAAAAA7fBnGAAAAACd50/yAAAAAFP5Jh8AAAAAiSQPKAAAAADlfY5NAAAAAP/8zuIAAAAAoS9UEgAAAAD7pTW4AAAAAHB5cVIAAAAARbh83AAAAADaocV3AAAAAG7HtekAAAAAmNEfcwAAAAD3YdvOAAAAAJPCtA4AAAAAoxmOwwAAAABrPuElAAAAAMak4r8AAAAAAUC/cgAAAAAhZCuIAAAAAN2mf+kAAAAAx83h5gAAAAAmbYMwAAAAAJiCnrQAAAAAQU55rAAAAADw+3SUAAAAAM/2XnEAAAAAcU07SwAAAADA5e+DAAAAAHttOgkAAAAAiGbszAAAAABluI1oAAAAAH2uALwAAAAAGUF/5wAAAABmMR5KAAAAALSCYAUAAAAAiKGWXwAAAAAnvn1PAAAAAAqH+bsAAAAABwJn9QAAAAAzsIpwAAAAABrZ0GcAAAAAhuYe5wAAAABO+FTkAAAAAM2/KvIAAAAAsRPS3gAAAACdx440AAAAAP4tumYAAAAA+bmCzQAAAADJHxanAAAAAPXucQIAAAAAonSCwAAAAACgjOtnAAAAABoWVc0AAAAAkdoAbwAAAACz7aNzAAAAAKW4ruYAAAAA8cr63gAAAABE6SNJAAAAAD2BPGMAAAAAdnPyEgAAAAD8U+XcAAAAAAcdXwAAAAAAEvif/wAAAAD8rlYaAAAAACG0El4AAAAA3437sgAAAAALlh8WAAAAAG9kxgcAAAAAdz6Z+QAAAACUVLt6AAAAAMOofScAAAAAiWSpbAAAAACQguXTAAAAAFroExIAAAAApWtqzgAAAABNjjRLAAAAAGf7vuUAAAAAHZvizwAAAAAHed+7AAAAABQIe3UAAAAA4XtGOAAAAAASc2AdAAAAANHNDk8AAAAAJzMFKAAAAAAXXfpMAAAAABu0ce0AAAAA4/NkJgAAAAA/+1OAAAAAADu2hoIAAAAAZmCDfwAAAADXQL4fAAAAAAS4ug0AAAAAG+OmAgAAAADu60ldAAAAAMsuxCUAAAAAj8UWaQAAAACr0RxfAAAAAAMdSFQAAAAA36GiRwAAAABk4uomAAAAAFcoM5EAAAAAs+yK7gAAAABcwIvCAAAAAIIBImQAAAAAfG9I6AAAAAAn4QM2AAAAALGuHggAAAAAcaGswgAAAACpMriNAAAAAI7aXk0AAAAAZTO/DgAAAABLMd42AAAAAEU6FlIAAAAAPk5rDAAAAADcO1eQAAAAABikK+4AAAAA6IPhsAAAAAALNbnjAAAAAH5zJeMAAAAAkpFEPwAAAABcpLkXAAAAAEQpk1gAAAAAg1qEDQAAAABVMEyiAAAAANdjUEYAAAAAlsVqdgAAAAAvFPWeAAAAAKqzNcIAAAAA98nh2wAAAAD29MteAAAAAAO9kOUAAAAAt+873wAAAAAAY/ZgAAAAAFdmymYAAAAAh6+t/wAAAACAUVNnAAAAAGFVacwAAAAAhn/nDQAAAADUmhjPAAAAAHmQWBgAAAAAql9jiwAAAACFC0uIAAAAAMPDQeUAAAAAR0NZ1QAAAABBn5deAAAAAB5TwDYAAAAAJuVeBQAAAABkVY1bAAAAAFMthYgAAAAA3ZKyXQAAAABC6R5uAAAAAPHG1l8AAAAA9cob5QAAAABoNpXQAAAAAADVHtoAAAAAoXyWdAAAAABzlNNLAAAAAAd7JTwAAAAAFQy61gAAAADNezk1AAAAAD3N/EMAAAAAqi9OHQAAAADu9V/uAAAAALEpjVkAAAAAy2UzWwAAAAC2chu5AAAAAIVrQEIAAAAAItdI3gAAAADERnBmAAAAAA34/pwAAAAARc9ZfAAAAAAlM5R9AAAAAMUAqCsAAAAAf48ghQAAAADY/1U9AAAAAGIcDcwAAAAAocqYuAAAAAA0etJoAAAAALJFCoYAAAAA3+LS9gAAAAAG6CGqAAAAAIZELuoAAAAAesiBRAAAAADIDXwHAAAAANY8KSQAAAAAzq0RkAAAAADNNOjsAAAAABZVTTsAAAAAMOJYbAAAAABNxQyWAAAAAOtIPlMAAAAAoxn9VQAAAAAyfQpBAAAAAFCxFm4AAAAAeul3ewAAAADixnHqAAAAACqMW+EAAAAAByiTLAAAAACfpwVeAAAAADppPwAAAAAANCiqtgAAAABv7Qs5AAAAAP5TNKcAAAAAOMhxeQAAAAAww45yAAAAALOIDngAAAAApdPt+gAAAABO/05GAAAAAHDeD98AAAAAH7Ye8QAAAADnCh6QAAAAANAEPSwAAAAAlwTimwAAAAB2scxWAAAAAMq2rHkAAAAAB/b8EwAAAAAQSW3SAAAAAMuKLnsAAAAAmRG5UAAAAAC2sGuCAAAAAIpjqUEAAAAAgvq4QgAAAABBhm3kAAAAAGa9p+kAAAAAJYzu4QAAAADz1dQfAAAAANC6C5YAAAAAbvuQowAAAABJEwlLAAAAAHwIY4oAAAAASP5riAAAAABzZpcJAAAAAKrZpgYAAAAAWaJRswAAAADVqREtAAAAAKjAN84AAAAACawW3wAAAADrqt14AAAAADA/MwwAAAAA6VoIkQAAAADKRR/qAAAAAGMhqpUAAAAA2gUL3QAAAADMhsOJAAAAAHy9Ru8AAAAApXZ9wwAAAAA6EmlLAAAAAM6zmT0AAAAAkPNYqQAAAAD6QheVAAAAAJ82DQgAAAAAqH9qbAAAAACFQMpFAAAAAMH5ZNsAAAAAq5UO+QAAAABRdDH1AAAAAMJR3gsAAAAAlcaqzQAAAADzWWEdAAAAAIQKqtoAAAAAwrvNwAAAAAD17N09AAAAABGwSVwAAAAA+rnH+AAAAAARTa6vAAAAAJLCGA4AAAAAIGRTswAAAAAbOwKbAAAAAFyofAUAAAAALDaxtAAAAADSpDyBAAAAABSlmB8AAAAAEX19IwAAAACEwHF8AAAAADXsqPwAAAAAe5IE4QAAAACZqU9pAAAAAGzMKdEAAAAAZuqdjQAAAABnaWP4AAAAAPW3T6wAAAAApqmHCgAAAACkUeS0AAAAANd0gnIAAAAAJ1219gAAAADYblUyAAAAANaeA3MAAAAA2JW20QAAAABE13o1AAAAAHE31nUAAAAAmABkGAAAAABaQFzGAAAAAKeAVTQAAAAAOU+S1wAAAABpLdz8AAAAAHOfZiIAAAAAKWLzEgAAAABSkJu/AAAAAKedPQQAAAAACtxWlAAAAABVHrLEAAAAAIgBSwMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
|
| 117 |
+
"cuda_rng_state": "BWVGVHP/DAAAAAAAAAAAAA=="
|
| 118 |
+
}
|
checkpoints/step_00045000/.complete
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"files": {
|
| 4 |
+
"config.json": "7d7a8111595963bc28a9b70cda8402bec27f0d6ca34b35fef100bfe5dba4ebeb",
|
| 5 |
+
"model.safetensors": "4b23b6a8f024c360d6137f1dc3ce11ef84c25899780e1bac416e9e80eb41be3e",
|
| 6 |
+
"optimizer.safetensors": "147287f25cba90d17731d7913b9055f4289009b6399eceaefd7059f544cbe86a",
|
| 7 |
+
"training_state.json": "ecf796cfe4968fa790a51d57189ab027190e5ea2c3b21a61cce97c9d3ff3e993"
|
| 8 |
+
}
|
| 9 |
+
}
|
checkpoints/step_00045000/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"checkpoint_type": "pretrain",
|
| 4 |
+
"model_config": {
|
| 5 |
+
"vocab_size": 4278,
|
| 6 |
+
"max_seq_len": 256,
|
| 7 |
+
"n_outcomes": 5,
|
| 8 |
+
"d_model": 256,
|
| 9 |
+
"n_layers": 8,
|
| 10 |
+
"n_heads": 4,
|
| 11 |
+
"d_ff": 1024,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"rope_base": 10000.0
|
| 14 |
+
},
|
| 15 |
+
"training_config": {
|
| 16 |
+
"lr": 0.0003,
|
| 17 |
+
"weight_decay": 0.01,
|
| 18 |
+
"max_grad_norm": 1.0,
|
| 19 |
+
"warmup_steps": 1000,
|
| 20 |
+
"total_steps": 100000,
|
| 21 |
+
"batch_size": 256,
|
| 22 |
+
"max_ply": 256,
|
| 23 |
+
"discard_ply_limit": false,
|
| 24 |
+
"num_workers": 4,
|
| 25 |
+
"use_amp": true,
|
| 26 |
+
"accumulation_steps": 1,
|
| 27 |
+
"log_interval": 10,
|
| 28 |
+
"eval_interval": 500,
|
| 29 |
+
"checkpoint_interval": 5000,
|
| 30 |
+
"base_seed": 42,
|
| 31 |
+
"val_seed": 9223372036854775807,
|
| 32 |
+
"val_games": 512,
|
| 33 |
+
"checkpoint_dir": "checkpoints",
|
| 34 |
+
"log_dir": "logs",
|
| 35 |
+
"use_wandb": false,
|
| 36 |
+
"wandb_project": "pawn",
|
| 37 |
+
"device": "cuda"
|
| 38 |
+
}
|
| 39 |
+
}
|
checkpoints/step_00045000/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b23b6a8f024c360d6137f1dc3ce11ef84c25899780e1bac416e9e80eb41be3e
|
| 3 |
+
size 38102280
|
checkpoints/step_00045000/optimizer.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:147287f25cba90d17731d7913b9055f4289009b6399eceaefd7059f544cbe86a
|
| 3 |
+
size 76210148
|
checkpoints/step_00045000/training_state.json
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"global_step": 45000,
|
| 4 |
+
"scheduler_state_dict": {
|
| 5 |
+
"step": 45000
|
| 6 |
+
},
|
| 7 |
+
"scaler_state_dict": {
|
| 8 |
+
"scale": 2097152.0,
|
| 9 |
+
"growth_factor": 2.0,
|
| 10 |
+
"backoff_factor": 0.5,
|
| 11 |
+
"growth_interval": 2000,
|
| 12 |
+
"_growth_tracker": 208
|
| 13 |
+
},
|
| 14 |
+
"optimizer_meta": {
|
| 15 |
+
"param_groups": [
|
| 16 |
+
{
|
| 17 |
+
"lr": 0.0001884425039850356,
|
| 18 |
+
"betas": [
|
| 19 |
+
0.9,
|
| 20 |
+
0.999
|
| 21 |
+
],
|
| 22 |
+
"eps": 1e-08,
|
| 23 |
+
"weight_decay": 0.01,
|
| 24 |
+
"amsgrad": false,
|
| 25 |
+
"maximize": false,
|
| 26 |
+
"foreach": null,
|
| 27 |
+
"capturable": false,
|
| 28 |
+
"differentiable": false,
|
| 29 |
+
"fused": null,
|
| 30 |
+
"decoupled_weight_decay": true,
|
| 31 |
+
"params": [
|
| 32 |
+
0,
|
| 33 |
+
1,
|
| 34 |
+
2,
|
| 35 |
+
3,
|
| 36 |
+
4,
|
| 37 |
+
5,
|
| 38 |
+
6,
|
| 39 |
+
7,
|
| 40 |
+
8,
|
| 41 |
+
9,
|
| 42 |
+
10,
|
| 43 |
+
11,
|
| 44 |
+
12,
|
| 45 |
+
13,
|
| 46 |
+
14,
|
| 47 |
+
15,
|
| 48 |
+
16,
|
| 49 |
+
17,
|
| 50 |
+
18,
|
| 51 |
+
19,
|
| 52 |
+
20,
|
| 53 |
+
21,
|
| 54 |
+
22,
|
| 55 |
+
23,
|
| 56 |
+
24,
|
| 57 |
+
25,
|
| 58 |
+
26,
|
| 59 |
+
27,
|
| 60 |
+
28,
|
| 61 |
+
29,
|
| 62 |
+
30,
|
| 63 |
+
31,
|
| 64 |
+
32,
|
| 65 |
+
33,
|
| 66 |
+
34,
|
| 67 |
+
35,
|
| 68 |
+
36,
|
| 69 |
+
37,
|
| 70 |
+
38,
|
| 71 |
+
39,
|
| 72 |
+
40,
|
| 73 |
+
41,
|
| 74 |
+
42,
|
| 75 |
+
43,
|
| 76 |
+
44,
|
| 77 |
+
45,
|
| 78 |
+
46,
|
| 79 |
+
47,
|
| 80 |
+
48,
|
| 81 |
+
49,
|
| 82 |
+
50,
|
| 83 |
+
51,
|
| 84 |
+
52,
|
| 85 |
+
53,
|
| 86 |
+
54,
|
| 87 |
+
55,
|
| 88 |
+
56,
|
| 89 |
+
57,
|
| 90 |
+
58,
|
| 91 |
+
59,
|
| 92 |
+
60,
|
| 93 |
+
61,
|
| 94 |
+
62,
|
| 95 |
+
63,
|
| 96 |
+
64,
|
| 97 |
+
65,
|
| 98 |
+
66,
|
| 99 |
+
67,
|
| 100 |
+
68,
|
| 101 |
+
69,
|
| 102 |
+
70,
|
| 103 |
+
71,
|
| 104 |
+
72,
|
| 105 |
+
73,
|
| 106 |
+
74,
|
| 107 |
+
75,
|
| 108 |
+
76,
|
| 109 |
+
77,
|
| 110 |
+
78
|
| 111 |
+
]
|
| 112 |
+
}
|
| 113 |
+
],
|
| 114 |
+
"scalars": null
|
| 115 |
+
},
|
| 116 |
+
"torch_rng_state": "h08rNwf38rOvAAAAAQAAAMIBAAAAAAAA5bVTKAAAAAC1CIbqAAAAAA3rKqAAAAAAGqogVAAAAABigsYrAAAAAAK5m78AAAAAcE43pAAAAAB+YHT7AAAAABSmyhcAAAAAiwIFRwAAAABru0i1AAAAAEAG8YYAAAAAyvLLfgAAAADgU+nAAAAAAFqEKAgAAAAA72kt/gAAAAC/V34GAAAAAJRKvuQAAAAARwrwvAAAAAB8+1buAAAAAHw2FqQAAAAATEURKwAAAADppdGbAAAAALFK1WsAAAAA9FjHJwAAAAAKRlfWAAAAAAxZlR8AAAAA7PS73AAAAABeXBUoAAAAAIA1IjIAAAAAC2JcuwAAAACKVAI/AAAAAJ4PbRkAAAAA/OdaFAAAAAB2VuHVAAAAAOjy5bMAAAAAKivF/gAAAACA5OnEAAAAAC+aNjMAAAAABNa0HQAAAABgCGjwAAAAACVYnKcAAAAAYnQAHgAAAACed89WAAAAAMk1u3oAAAAAUKriUQAAAAC9t94zAAAAAPbZbRcAAAAAGOth4wAAAAAs80jFAAAAAKRt/bQAAAAAa6dpYQAAAABipCt+AAAAAN56VTYAAAAAXpizpwAAAABgJXGrAAAAAFiUPsEAAAAAvOynQAAAAACWnN22AAAAAEPkFa4AAAAAsqkleAAAAAAVdN9zAAAAAHvqifQAAAAAA0n6LwAAAACAuTx4AAAAAD6Z93cAAAAApS7GTwAAAADjfskvAAAAAA+ewj4AAAAAl9DQLAAAAAD9kSH/AAAAAEA+GYgAAAAAvsbprwAAAAB3NN+0AAAAANc6YrwAAAAAnfkv2gAAAACMQfjeAAAAAGA0pCgAAAAAvTnvHAAAAAC4jLlTAAAAALc9p4wAAAAAZjDhogAAAADAlSqiAAAAAAzt0eMAAAAAguPkrgAAAACEQa6HAAAAAFfNjdEAAAAAGoYWVAAAAABF3yVKAAAAADgonYgAAAAAb6PyRAAAAACG7abbAAAAABqiUFAAAAAA0SzSDAAAAACeoR0/AAAAAOgi1CcAAAAA9Dw29QAAAABFfJluAAAAAP8tr7AAAAAAaLAamQAAAABjRZ8PAAAAALEV3aUAAAAAAIRuuwAAAACHGl7bAAAAAIcEIUUAAAAASi25VAAAAADR+U0fAAAAAOWbOB4AAAAAmvLGFAAAAAAJNxX1AAAAAHtE2ScAAAAAk2NOTgAAAAD+65BYAAAAAOsZFekAAAAAWIKxJAAAAABiKM6BAAAAAE1YYQEAAAAA2sop3wAAAABCIl6iAAAAAP/T7yYAAAAAWZNJcQAAAABFOZsSAAAAAFzETvMAAAAAcFP0/AAAAADurpxIAAAAAIaDQdMAAAAAsS/3TAAAAAA0YVttAAAAAGstmGoAAAAAQqLbLAAAAABXzRZ4AAAAANqZ/18AAAAAdVwB9AAAAADsfpg2AAAAAMy2zKoAAAAACSy3KAAAAABDzRafAAAAAAzi404AAAAA3pVkeQAAAAAQ5caLAAAAAHXDzEcAAAAAS59+2QAAAABV2J9+AAAAAMejV/YAAAAAKwKUxwAAAADKYpLRAAAAAPvntMAAAAAA5v2IfQAAAAC9+IekAAAAABN+sCwAAAAAgOOzyQAAAABjjSj0AAAAAG2AsasAAAAA4CMoZgAAAAAHpm84AAAAANC8aZAAAAAAfiO4WQAAAAAzUg0HAAAAAGyLSTsAAAAAXBS5MQAAAADs/KtnAAAAAJO9YCIAAAAAnoesBAAAAAD0ogF9AAAAABKv9CQAAAAA1gbspQAAAABmhttUAAAAAOJ/TG4AAAAAhS0PpAAAAADx5KyDAAAAADmNIvEAAAAA5KJXxAAAAAAGOjP/AAAAADzZdKcAAAAAA8FPFgAAAAALDaYcAAAAAB0U9pgAAAAAr2EncwAAAABok7yTAAAAACRvTNIAAAAAcQKJkQAAAAAvcQE8AAAAALcp3hEAAAAA9ChDqgAAAACs3b0CAAAAAHROJI0AAAAA1R3GBgAAAAA6qGxzAAAAAOeWbGcAAAAAU9x6SgAAAADH/vP6AAAAAKOI8XIAAAAA8KA2VQAAAAAfGgFFAAAAANtgjI0AAAAAsGlzhQAAAABVwSsRAAAAAGXdNw0AAAAAbQSpewAAAAAx9MXlAAAAAMmuVXQAAAAAkTwfDwAAAAB9+wFIAAAAALLWklQAAAAASRZ3CgAAAABCqD25AAAAADrP138AAAAAA9Li1AAAAABj0TYLAAAAAHNsR/oAAAAA2maP7gAAAAB/GyXyAAAAAEsu8MgAAAAAspXNQgAAAAC1Xgb/AAAAALkz90EAAAAA48lYngAAAABm+qG1AAAAAMcXfW0AAAAAE90WugAAAADnSDIFAAAAAJmpDgYAAAAAnAkyCAAAAAD/iCk2AAAAAElp5lsAAAAAeHAxnAAAAADbQgcKAAAAAJ8CEbIAAAAAVY4u5wAAAAAOrWjNAAAAAMMgCpAAAAAAt+qJqQAAAACUbpfvAAAAAFTRiEYAAAAAi7yHngAAAAAyQ1DYAAAAAGJgz/QAAAAA7solGQAAAABxT00rAAAAAArI5bYAAAAARGqyPAAAAABFr7c7AAAAAFEE51wAAAAAum0rtgAAAAAMHHNDAAAAAM3EjogAAAAA4o430QAAAADk7fE+AAAAAAViqkEAAAAAGes0MwAAAABdjpDvAAAAAEbUJRQAAAAAdrnGOgAAAABrQWIuAAAAACvnK2sAAAAAEyRlJgAAAAC/12WyAAAAAEBI67MAAAAAtGGEZwAAAABGu6sdAAAAALuyVuYAAAAAgx0QzgAAAABtMTt3AAAAAMjVDVsAAAAARt941wAAAAAgkpeRAAAAAFLnED4AAAAAjv2/6wAAAADia1FwAAAAAJnk6jwAAAAAJ3jF/AAAAAAhFYtcAAAAAD0VY2UAAAAAd3N11gAAAACzKnTWAAAAAGIujsQAAAAAooiH2gAAAAB/yEYxAAAAADjjTNQAAAAASNHEhwAAAABtncUMAAAAADSX9KIAAAAAbiZNSAAAAACwhbuVAAAAALLTcSYAAAAAmSzpFAAAAABq7Ct3AAAAAPWp3D4AAAAAAe4gpwAAAACsTU9qAAAAAP2tI4QAAAAARNCL7QAAAAA06+aVAAAAAJoNQN0AAAAAJWtPhwAAAAAXEn70AAAAAJssGqkAAAAAAKFQUQAAAACT9kT0AAAAAFu3JOUAAAAANO5gIAAAAAB+VuXIAAAAAPHeI4QAAAAADT7B/wAAAADfokXMAAAAAJfg/f0AAAAArlfEXAAAAAAkYKSUAAAAAEQpA7IAAAAALJqctgAAAACjeaRfAAAAABqE3tEAAAAA0rJopAAAAAD4yLv2AAAAAFOxR8MAAAAAeEtDDwAAAACwX8hdAAAAAKjoZU0AAAAAV26whwAAAAC5K7eKAAAAACxfVTIAAAAA7fBnGAAAAACd50/yAAAAAFP5Jh8AAAAAiSQPKAAAAADlfY5NAAAAAP/8zuIAAAAAoS9UEgAAAAD7pTW4AAAAAHB5cVIAAAAARbh83AAAAADaocV3AAAAAG7HtekAAAAAmNEfcwAAAAD3YdvOAAAAAJPCtA4AAAAAoxmOwwAAAABrPuElAAAAAMak4r8AAAAAAUC/cgAAAAAhZCuIAAAAAN2mf+kAAAAAx83h5gAAAAAmbYMwAAAAAJiCnrQAAAAAQU55rAAAAADw+3SUAAAAAM/2XnEAAAAAcU07SwAAAADA5e+DAAAAAHttOgkAAAAAiGbszAAAAABluI1oAAAAAH2uALwAAAAAGUF/5wAAAABmMR5KAAAAALSCYAUAAAAAiKGWXwAAAAAnvn1PAAAAAAqH+bsAAAAABwJn9QAAAAAzsIpwAAAAABrZ0GcAAAAAhuYe5wAAAABO+FTkAAAAAM2/KvIAAAAAsRPS3gAAAACdx440AAAAAP4tumYAAAAA+bmCzQAAAADJHxanAAAAAPXucQIAAAAAonSCwAAAAACgjOtnAAAAABoWVc0AAAAAkdoAbwAAAACz7aNzAAAAAKW4ruYAAAAA8cr63gAAAABE6SNJAAAAAD2BPGMAAAAAdnPyEgAAAAD8U+XcAAAAAAcdXwAAAAAAEvif/wAAAAD8rlYaAAAAACG0El4AAAAA3437sgAAAAALlh8WAAAAAG9kxgcAAAAAdz6Z+QAAAACUVLt6AAAAAMOofScAAAAAiWSpbAAAAACQguXTAAAAAFroExIAAAAApWtqzgAAAABNjjRLAAAAAGf7vuUAAAAAHZvizwAAAAAHed+7AAAAABQIe3UAAAAA4XtGOAAAAAASc2AdAAAAANHNDk8AAAAAJzMFKAAAAAAXXfpMAAAAABu0ce0AAAAA4/NkJgAAAAA/+1OAAAAAADu2hoIAAAAAZmCDfwAAAADXQL4fAAAAAAS4ug0AAAAAG+OmAgAAAADu60ldAAAAAMsuxCUAAAAAj8UWaQAAAACr0RxfAAAAAAMdSFQAAAAA36GiRwAAAABk4uomAAAAAFcoM5EAAAAAs+yK7gAAAABcwIvCAAAAAIIBImQAAAAAfG9I6AAAAAAn4QM2AAAAALGuHggAAAAAcaGswgAAAACpMriNAAAAAI7aXk0AAAAAZTO/DgAAAABLMd42AAAAAEU6FlIAAAAAPk5rDAAAAADcO1eQAAAAABikK+4AAAAA6IPhsAAAAAALNbnjAAAAAH5zJeMAAAAAkpFEPwAAAABcpLkXAAAAAEQpk1gAAAAAg1qEDQAAAABVMEyiAAAAANdjUEYAAAAAlsVqdgAAAAAvFPWeAAAAAKqzNcIAAAAA98nh2wAAAAD29MteAAAAAAO9kOUAAAAAt+873wAAAAAAY/ZgAAAAAFdmymYAAAAAh6+t/wAAAACAUVNnAAAAAGFVacwAAAAAhn/nDQAAAADUmhjPAAAAAHmQWBgAAAAAql9jiwAAAACFC0uIAAAAAMPDQeUAAAAAR0NZ1QAAAABBn5deAAAAAB5TwDYAAAAAJuVeBQAAAABkVY1bAAAAAFMthYgAAAAA3ZKyXQAAAABC6R5uAAAAAPHG1l8AAAAA9cob5QAAAABoNpXQAAAAAADVHtoAAAAAoXyWdAAAAABzlNNLAAAAAAd7JTwAAAAAFQy61gAAAADNezk1AAAAAD3N/EMAAAAAqi9OHQAAAADu9V/uAAAAALEpjVkAAAAAy2UzWwAAAAC2chu5AAAAAIVrQEIAAAAAItdI3gAAAADERnBmAAAAAA34/pwAAAAARc9ZfAAAAAAlM5R9AAAAAMUAqCsAAAAAf48ghQAAAADY/1U9AAAAAGIcDcwAAAAAocqYuAAAAAA0etJoAAAAALJFCoYAAAAA3+LS9gAAAAAG6CGqAAAAAIZELuoAAAAAesiBRAAAAADIDXwHAAAAANY8KSQAAAAAzq0RkAAAAADNNOjsAAAAABZVTTsAAAAAMOJYbAAAAABNxQyWAAAAAOtIPlMAAAAAoxn9VQAAAAAyfQpBAAAAAFCxFm4AAAAAeul3ewAAAADixnHqAAAAACqMW+EAAAAAByiTLAAAAACfpwVeAAAAADppPwAAAAAANCiqtgAAAABv7Qs5AAAAAP5TNKcAAAAAOMhxeQAAAAAww45yAAAAALOIDngAAAAApdPt+gAAAABO/05GAAAAAHDeD98AAAAAH7Ye8QAAAADnCh6QAAAAANAEPSwAAAAAlwTimwAAAAB2scxWAAAAAMq2rHkAAAAAB/b8EwAAAAAQSW3SAAAAAMuKLnsAAAAAmRG5UAAAAAC2sGuCAAAAAIpjqUEAAAAAgvq4QgAAAABBhm3kAAAAAGa9p+kAAAAAJYzu4QAAAADz1dQfAAAAANC6C5YAAAAAbvuQowAAAABJEwlLAAAAAHwIY4oAAAAASP5riAAAAABzZpcJAAAAAKrZpgYAAAAAWaJRswAAAADVqREtAAAAAKjAN84AAAAACawW3wAAAADrqt14AAAAADA/MwwAAAAA6VoIkQAAAADKRR/qAAAAAGMhqpUAAAAA2gUL3QAAAADMhsOJAAAAAHy9Ru8AAAAApXZ9wwAAAAA6EmlLAAAAAM6zmT0AAAAAkPNYqQAAAAD6QheVAAAAAJ82DQgAAAAAqH9qbAAAAACFQMpFAAAAAMH5ZNsAAAAAq5UO+QAAAABRdDH1AAAAAMJR3gsAAAAAlcaqzQAAAADzWWEdAAAAAIQKqtoAAAAAwrvNwAAAAAD17N09AAAAABGwSVwAAAAA+rnH+AAAAAARTa6vAAAAAJLCGA4AAAAAIGRTswAAAAAbOwKbAAAAAFyofAUAAAAALDaxtAAAAADSpDyBAAAAABSlmB8AAAAAEX19IwAAAACEwHF8AAAAADXsqPwAAAAAe5IE4QAAAACZqU9pAAAAAGzMKdEAAAAAZuqdjQAAAABnaWP4AAAAAPW3T6wAAAAApqmHCgAAAACkUeS0AAAAANd0gnIAAAAAJ1219gAAAADYblUyAAAAANaeA3MAAAAA2JW20QAAAABE13o1AAAAAHE31nUAAAAAmABkGAAAAABaQFzGAAAAAKeAVTQAAAAAOU+S1wAAAABpLdz8AAAAAHOfZiIAAAAAKWLzEgAAAABSkJu/AAAAAKedPQQAAAAACtxWlAAAAABVHrLEAAAAAIgBSwMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
|
| 117 |
+
"cuda_rng_state": "BWVGVHP/DAAAAAAAAAAAAA=="
|
| 118 |
+
}
|
checkpoints/step_00050000/.complete
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"files": {
|
| 4 |
+
"config.json": "7d7a8111595963bc28a9b70cda8402bec27f0d6ca34b35fef100bfe5dba4ebeb",
|
| 5 |
+
"model.safetensors": "005ecd077859aeacef00b0a028b9179cce1a5c834da3c7c2737446ea826409e0",
|
| 6 |
+
"optimizer.safetensors": "bc253d8681a0c1e14cdc4a29179346d31009239541747a08d9292bc74e1eb8ea",
|
| 7 |
+
"training_state.json": "9abe70f8d05b681b7201c5c9269def6e8678ab4ca4f50d08471937e8ba855d7f"
|
| 8 |
+
}
|
| 9 |
+
}
|
checkpoints/step_00050000/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"checkpoint_type": "pretrain",
|
| 4 |
+
"model_config": {
|
| 5 |
+
"vocab_size": 4278,
|
| 6 |
+
"max_seq_len": 256,
|
| 7 |
+
"n_outcomes": 5,
|
| 8 |
+
"d_model": 256,
|
| 9 |
+
"n_layers": 8,
|
| 10 |
+
"n_heads": 4,
|
| 11 |
+
"d_ff": 1024,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"rope_base": 10000.0
|
| 14 |
+
},
|
| 15 |
+
"training_config": {
|
| 16 |
+
"lr": 0.0003,
|
| 17 |
+
"weight_decay": 0.01,
|
| 18 |
+
"max_grad_norm": 1.0,
|
| 19 |
+
"warmup_steps": 1000,
|
| 20 |
+
"total_steps": 100000,
|
| 21 |
+
"batch_size": 256,
|
| 22 |
+
"max_ply": 256,
|
| 23 |
+
"discard_ply_limit": false,
|
| 24 |
+
"num_workers": 4,
|
| 25 |
+
"use_amp": true,
|
| 26 |
+
"accumulation_steps": 1,
|
| 27 |
+
"log_interval": 10,
|
| 28 |
+
"eval_interval": 500,
|
| 29 |
+
"checkpoint_interval": 5000,
|
| 30 |
+
"base_seed": 42,
|
| 31 |
+
"val_seed": 9223372036854775807,
|
| 32 |
+
"val_games": 512,
|
| 33 |
+
"checkpoint_dir": "checkpoints",
|
| 34 |
+
"log_dir": "logs",
|
| 35 |
+
"use_wandb": false,
|
| 36 |
+
"wandb_project": "pawn",
|
| 37 |
+
"device": "cuda"
|
| 38 |
+
}
|
| 39 |
+
}
|
checkpoints/step_00050000/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:005ecd077859aeacef00b0a028b9179cce1a5c834da3c7c2737446ea826409e0
|
| 3 |
+
size 38102280
|
checkpoints/step_00050000/optimizer.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc253d8681a0c1e14cdc4a29179346d31009239541747a08d9292bc74e1eb8ea
|
| 3 |
+
size 76210148
|
checkpoints/step_00050000/training_state.json
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"format_version": 1,
|
| 3 |
+
"global_step": 50000,
|
| 4 |
+
"scheduler_state_dict": {
|
| 5 |
+
"step": 50000
|
| 6 |
+
},
|
| 7 |
+
"scaler_state_dict": {
|
| 8 |
+
"scale": 2097152.0,
|
| 9 |
+
"growth_factor": 2.0,
|
| 10 |
+
"backoff_factor": 0.5,
|
| 11 |
+
"growth_interval": 2000,
|
| 12 |
+
"_growth_tracker": 678
|
| 13 |
+
},
|
| 14 |
+
"optimizer_meta": {
|
| 15 |
+
"param_groups": [
|
| 16 |
+
{
|
| 17 |
+
"lr": 0.00016714190511769907,
|
| 18 |
+
"betas": [
|
| 19 |
+
0.9,
|
| 20 |
+
0.999
|
| 21 |
+
],
|
| 22 |
+
"eps": 1e-08,
|
| 23 |
+
"weight_decay": 0.01,
|
| 24 |
+
"amsgrad": false,
|
| 25 |
+
"maximize": false,
|
| 26 |
+
"foreach": null,
|
| 27 |
+
"capturable": false,
|
| 28 |
+
"differentiable": false,
|
| 29 |
+
"fused": null,
|
| 30 |
+
"decoupled_weight_decay": true,
|
| 31 |
+
"params": [
|
| 32 |
+
0,
|
| 33 |
+
1,
|
| 34 |
+
2,
|
| 35 |
+
3,
|
| 36 |
+
4,
|
| 37 |
+
5,
|
| 38 |
+
6,
|
| 39 |
+
7,
|
| 40 |
+
8,
|
| 41 |
+
9,
|
| 42 |
+
10,
|
| 43 |
+
11,
|
| 44 |
+
12,
|
| 45 |
+
13,
|
| 46 |
+
14,
|
| 47 |
+
15,
|
| 48 |
+
16,
|
| 49 |
+
17,
|
| 50 |
+
18,
|
| 51 |
+
19,
|
| 52 |
+
20,
|
| 53 |
+
21,
|
| 54 |
+
22,
|
| 55 |
+
23,
|
| 56 |
+
24,
|
| 57 |
+
25,
|
| 58 |
+
26,
|
| 59 |
+
27,
|
| 60 |
+
28,
|
| 61 |
+
29,
|
| 62 |
+
30,
|
| 63 |
+
31,
|
| 64 |
+
32,
|
| 65 |
+
33,
|
| 66 |
+
34,
|
| 67 |
+
35,
|
| 68 |
+
36,
|
| 69 |
+
37,
|
| 70 |
+
38,
|
| 71 |
+
39,
|
| 72 |
+
40,
|
| 73 |
+
41,
|
| 74 |
+
42,
|
| 75 |
+
43,
|
| 76 |
+
44,
|
| 77 |
+
45,
|
| 78 |
+
46,
|
| 79 |
+
47,
|
| 80 |
+
48,
|
| 81 |
+
49,
|
| 82 |
+
50,
|
| 83 |
+
51,
|
| 84 |
+
52,
|
| 85 |
+
53,
|
| 86 |
+
54,
|
| 87 |
+
55,
|
| 88 |
+
56,
|
| 89 |
+
57,
|
| 90 |
+
58,
|
| 91 |
+
59,
|
| 92 |
+
60,
|
| 93 |
+
61,
|
| 94 |
+
62,
|
| 95 |
+
63,
|
| 96 |
+
64,
|
| 97 |
+
65,
|
| 98 |
+
66,
|
| 99 |
+
67,
|
| 100 |
+
68,
|
| 101 |
+
69,
|
| 102 |
+
70,
|
| 103 |
+
71,
|
| 104 |
+
72,
|
| 105 |
+
73,
|
| 106 |
+
74,
|
| 107 |
+
75,
|
| 108 |
+
76,
|
| 109 |
+
77,
|
| 110 |
+
78
|
| 111 |
+
]
|
| 112 |
+
}
|
| 113 |
+
],
|
| 114 |
+
"scalars": null
|
| 115 |
+
},
|
| 116 |
+
"torch_rng_state": "h08rNwf38rOvAAAAAQAAAMIBAAAAAAAA5bVTKAAAAAC1CIbqAAAAAA3rKqAAAAAAGqogVAAAAABigsYrAAAAAAK5m78AAAAAcE43pAAAAAB+YHT7AAAAABSmyhcAAAAAiwIFRwAAAABru0i1AAAAAEAG8YYAAAAAyvLLfgAAAADgU+nAAAAAAFqEKAgAAAAA72kt/gAAAAC/V34GAAAAAJRKvuQAAAAARwrwvAAAAAB8+1buAAAAAHw2FqQAAAAATEURKwAAAADppdGbAAAAALFK1WsAAAAA9FjHJwAAAAAKRlfWAAAAAAxZlR8AAAAA7PS73AAAAABeXBUoAAAAAIA1IjIAAAAAC2JcuwAAAACKVAI/AAAAAJ4PbRkAAAAA/OdaFAAAAAB2VuHVAAAAAOjy5bMAAAAAKivF/gAAAACA5OnEAAAAAC+aNjMAAAAABNa0HQAAAABgCGjwAAAAACVYnKcAAAAAYnQAHgAAAACed89WAAAAAMk1u3oAAAAAUKriUQAAAAC9t94zAAAAAPbZbRcAAAAAGOth4wAAAAAs80jFAAAAAKRt/bQAAAAAa6dpYQAAAABipCt+AAAAAN56VTYAAAAAXpizpwAAAABgJXGrAAAAAFiUPsEAAAAAvOynQAAAAACWnN22AAAAAEPkFa4AAAAAsqkleAAAAAAVdN9zAAAAAHvqifQAAAAAA0n6LwAAAACAuTx4AAAAAD6Z93cAAAAApS7GTwAAAADjfskvAAAAAA+ewj4AAAAAl9DQLAAAAAD9kSH/AAAAAEA+GYgAAAAAvsbprwAAAAB3NN+0AAAAANc6YrwAAAAAnfkv2gAAAACMQfjeAAAAAGA0pCgAAAAAvTnvHAAAAAC4jLlTAAAAALc9p4wAAAAAZjDhogAAAADAlSqiAAAAAAzt0eMAAAAAguPkrgAAAACEQa6HAAAAAFfNjdEAAAAAGoYWVAAAAABF3yVKAAAAADgonYgAAAAAb6PyRAAAAACG7abbAAAAABqiUFAAAAAA0SzSDAAAAACeoR0/AAAAAOgi1CcAAAAA9Dw29QAAAABFfJluAAAAAP8tr7AAAAAAaLAamQAAAABjRZ8PAAAAALEV3aUAAAAAAIRuuwAAAACHGl7bAAAAAIcEIUUAAAAASi25VAAAAADR+U0fAAAAAOWbOB4AAAAAmvLGFAAAAAAJNxX1AAAAAHtE2ScAAAAAk2NOTgAAAAD+65BYAAAAAOsZFekAAAAAWIKxJAAAAABiKM6BAAAAAE1YYQEAAAAA2sop3wAAAABCIl6iAAAAAP/T7yYAAAAAWZNJcQAAAABFOZsSAAAAAFzETvMAAAAAcFP0/AAAAADurpxIAAAAAIaDQdMAAAAAsS/3TAAAAAA0YVttAAAAAGstmGoAAAAAQqLbLAAAAABXzRZ4AAAAANqZ/18AAAAAdVwB9AAAAADsfpg2AAAAAMy2zKoAAAAACSy3KAAAAABDzRafAAAAAAzi404AAAAA3pVkeQAAAAAQ5caLAAAAAHXDzEcAAAAAS59+2QAAAABV2J9+AAAAAMejV/YAAAAAKwKUxwAAAADKYpLRAAAAAPvntMAAAAAA5v2IfQAAAAC9+IekAAAAABN+sCwAAAAAgOOzyQAAAABjjSj0AAAAAG2AsasAAAAA4CMoZgAAAAAHpm84AAAAANC8aZAAAAAAfiO4WQAAAAAzUg0HAAAAAGyLSTsAAAAAXBS5MQAAAADs/KtnAAAAAJO9YCIAAAAAnoesBAAAAAD0ogF9AAAAABKv9CQAAAAA1gbspQAAAABmhttUAAAAAOJ/TG4AAAAAhS0PpAAAAADx5KyDAAAAADmNIvEAAAAA5KJXxAAAAAAGOjP/AAAAADzZdKcAAAAAA8FPFgAAAAALDaYcAAAAAB0U9pgAAAAAr2EncwAAAABok7yTAAAAACRvTNIAAAAAcQKJkQAAAAAvcQE8AAAAALcp3hEAAAAA9ChDqgAAAACs3b0CAAAAAHROJI0AAAAA1R3GBgAAAAA6qGxzAAAAAOeWbGcAAAAAU9x6SgAAAADH/vP6AAAAAKOI8XIAAAAA8KA2VQAAAAAfGgFFAAAAANtgjI0AAAAAsGlzhQAAAABVwSsRAAAAAGXdNw0AAAAAbQSpewAAAAAx9MXlAAAAAMmuVXQAAAAAkTwfDwAAAAB9+wFIAAAAALLWklQAAAAASRZ3CgAAAABCqD25AAAAADrP138AAAAAA9Li1AAAAABj0TYLAAAAAHNsR/oAAAAA2maP7gAAAAB/GyXyAAAAAEsu8MgAAAAAspXNQgAAAAC1Xgb/AAAAALkz90EAAAAA48lYngAAAABm+qG1AAAAAMcXfW0AAAAAE90WugAAAADnSDIFAAAAAJmpDgYAAAAAnAkyCAAAAAD/iCk2AAAAAElp5lsAAAAAeHAxnAAAAADbQgcKAAAAAJ8CEbIAAAAAVY4u5wAAAAAOrWjNAAAAAMMgCpAAAAAAt+qJqQAAAACUbpfvAAAAAFTRiEYAAAAAi7yHngAAAAAyQ1DYAAAAAGJgz/QAAAAA7solGQAAAABxT00rAAAAAArI5bYAAAAARGqyPAAAAABFr7c7AAAAAFEE51wAAAAAum0rtgAAAAAMHHNDAAAAAM3EjogAAAAA4o430QAAAADk7fE+AAAAAAViqkEAAAAAGes0MwAAAABdjpDvAAAAAEbUJRQAAAAAdrnGOgAAAABrQWIuAAAAACvnK2sAAAAAEyRlJgAAAAC/12WyAAAAAEBI67MAAAAAtGGEZwAAAABGu6sdAAAAALuyVuYAAAAAgx0QzgAAAABtMTt3AAAAAMjVDVsAAAAARt941wAAAAAgkpeRAAAAAFLnED4AAAAAjv2/6wAAAADia1FwAAAAAJnk6jwAAAAAJ3jF/AAAAAAhFYtcAAAAAD0VY2UAAAAAd3N11gAAAACzKnTWAAAAAGIujsQAAAAAooiH2gAAAAB/yEYxAAAAADjjTNQAAAAASNHEhwAAAABtncUMAAAAADSX9KIAAAAAbiZNSAAAAACwhbuVAAAAALLTcSYAAAAAmSzpFAAAAABq7Ct3AAAAAPWp3D4AAAAAAe4gpwAAAACsTU9qAAAAAP2tI4QAAAAARNCL7QAAAAA06+aVAAAAAJoNQN0AAAAAJWtPhwAAAAAXEn70AAAAAJssGqkAAAAAAKFQUQAAAACT9kT0AAAAAFu3JOUAAAAANO5gIAAAAAB+VuXIAAAAAPHeI4QAAAAADT7B/wAAAADfokXMAAAAAJfg/f0AAAAArlfEXAAAAAAkYKSUAAAAAEQpA7IAAAAALJqctgAAAACjeaRfAAAAABqE3tEAAAAA0rJopAAAAAD4yLv2AAAAAFOxR8MAAAAAeEtDDwAAAACwX8hdAAAAAKjoZU0AAAAAV26whwAAAAC5K7eKAAAAACxfVTIAAAAA7fBnGAAAAACd50/yAAAAAFP5Jh8AAAAAiSQPKAAAAADlfY5NAAAAAP/8zuIAAAAAoS9UEgAAAAD7pTW4AAAAAHB5cVIAAAAARbh83AAAAADaocV3AAAAAG7HtekAAAAAmNEfcwAAAAD3YdvOAAAAAJPCtA4AAAAAoxmOwwAAAABrPuElAAAAAMak4r8AAAAAAUC/cgAAAAAhZCuIAAAAAN2mf+kAAAAAx83h5gAAAAAmbYMwAAAAAJiCnrQAAAAAQU55rAAAAADw+3SUAAAAAM/2XnEAAAAAcU07SwAAAADA5e+DAAAAAHttOgkAAAAAiGbszAAAAABluI1oAAAAAH2uALwAAAAAGUF/5wAAAABmMR5KAAAAALSCYAUAAAAAiKGWXwAAAAAnvn1PAAAAAAqH+bsAAAAABwJn9QAAAAAzsIpwAAAAABrZ0GcAAAAAhuYe5wAAAABO+FTkAAAAAM2/KvIAAAAAsRPS3gAAAACdx440AAAAAP4tumYAAAAA+bmCzQAAAADJHxanAAAAAPXucQIAAAAAonSCwAAAAACgjOtnAAAAABoWVc0AAAAAkdoAbwAAAACz7aNzAAAAAKW4ruYAAAAA8cr63gAAAABE6SNJAAAAAD2BPGMAAAAAdnPyEgAAAAD8U+XcAAAAAAcdXwAAAAAAEvif/wAAAAD8rlYaAAAAACG0El4AAAAA3437sgAAAAALlh8WAAAAAG9kxgcAAAAAdz6Z+QAAAACUVLt6AAAAAMOofScAAAAAiWSpbAAAAACQguXTAAAAAFroExIAAAAApWtqzgAAAABNjjRLAAAAAGf7vuUAAAAAHZvizwAAAAAHed+7AAAAABQIe3UAAAAA4XtGOAAAAAASc2AdAAAAANHNDk8AAAAAJzMFKAAAAAAXXfpMAAAAABu0ce0AAAAA4/NkJgAAAAA/+1OAAAAAADu2hoIAAAAAZmCDfwAAAADXQL4fAAAAAAS4ug0AAAAAG+OmAgAAAADu60ldAAAAAMsuxCUAAAAAj8UWaQAAAACr0RxfAAAAAAMdSFQAAAAA36GiRwAAAABk4uomAAAAAFcoM5EAAAAAs+yK7gAAAABcwIvCAAAAAIIBImQAAAAAfG9I6AAAAAAn4QM2AAAAALGuHggAAAAAcaGswgAAAACpMriNAAAAAI7aXk0AAAAAZTO/DgAAAABLMd42AAAAAEU6FlIAAAAAPk5rDAAAAADcO1eQAAAAABikK+4AAAAA6IPhsAAAAAALNbnjAAAAAH5zJeMAAAAAkpFEPwAAAABcpLkXAAAAAEQpk1gAAAAAg1qEDQAAAABVMEyiAAAAANdjUEYAAAAAlsVqdgAAAAAvFPWeAAAAAKqzNcIAAAAA98nh2wAAAAD29MteAAAAAAO9kOUAAAAAt+873wAAAAAAY/ZgAAAAAFdmymYAAAAAh6+t/wAAAACAUVNnAAAAAGFVacwAAAAAhn/nDQAAAADUmhjPAAAAAHmQWBgAAAAAql9jiwAAAACFC0uIAAAAAMPDQeUAAAAAR0NZ1QAAAABBn5deAAAAAB5TwDYAAAAAJuVeBQAAAABkVY1bAAAAAFMthYgAAAAA3ZKyXQAAAABC6R5uAAAAAPHG1l8AAAAA9cob5QAAAABoNpXQAAAAAADVHtoAAAAAoXyWdAAAAABzlNNLAAAAAAd7JTwAAAAAFQy61gAAAADNezk1AAAAAD3N/EMAAAAAqi9OHQAAAADu9V/uAAAAALEpjVkAAAAAy2UzWwAAAAC2chu5AAAAAIVrQEIAAAAAItdI3gAAAADERnBmAAAAAA34/pwAAAAARc9ZfAAAAAAlM5R9AAAAAMUAqCsAAAAAf48ghQAAAADY/1U9AAAAAGIcDcwAAAAAocqYuAAAAAA0etJoAAAAALJFCoYAAAAA3+LS9gAAAAAG6CGqAAAAAIZELuoAAAAAesiBRAAAAADIDXwHAAAAANY8KSQAAAAAzq0RkAAAAADNNOjsAAAAABZVTTsAAAAAMOJYbAAAAABNxQyWAAAAAOtIPlMAAAAAoxn9VQAAAAAyfQpBAAAAAFCxFm4AAAAAeul3ewAAAADixnHqAAAAACqMW+EAAAAAByiTLAAAAACfpwVeAAAAADppPwAAAAAANCiqtgAAAABv7Qs5AAAAAP5TNKcAAAAAOMhxeQAAAAAww45yAAAAALOIDngAAAAApdPt+gAAAABO/05GAAAAAHDeD98AAAAAH7Ye8QAAAADnCh6QAAAAANAEPSwAAAAAlwTimwAAAAB2scxWAAAAAMq2rHkAAAAAB/b8EwAAAAAQSW3SAAAAAMuKLnsAAAAAmRG5UAAAAAC2sGuCAAAAAIpjqUEAAAAAgvq4QgAAAABBhm3kAAAAAGa9p+kAAAAAJYzu4QAAAADz1dQfAAAAANC6C5YAAAAAbvuQowAAAABJEwlLAAAAAHwIY4oAAAAASP5riAAAAABzZpcJAAAAAKrZpgYAAAAAWaJRswAAAADVqREtAAAAAKjAN84AAAAACawW3wAAAADrqt14AAAAADA/MwwAAAAA6VoIkQAAAADKRR/qAAAAAGMhqpUAAAAA2gUL3QAAAADMhsOJAAAAAHy9Ru8AAAAApXZ9wwAAAAA6EmlLAAAAAM6zmT0AAAAAkPNYqQAAAAD6QheVAAAAAJ82DQgAAAAAqH9qbAAAAACFQMpFAAAAAMH5ZNsAAAAAq5UO+QAAAABRdDH1AAAAAMJR3gsAAAAAlcaqzQAAAADzWWEdAAAAAIQKqtoAAAAAwrvNwAAAAAD17N09AAAAABGwSVwAAAAA+rnH+AAAAAARTa6vAAAAAJLCGA4AAAAAIGRTswAAAAAbOwKbAAAAAFyofAUAAAAALDaxtAAAAADSpDyBAAAAABSlmB8AAAAAEX19IwAAAACEwHF8AAAAADXsqPwAAAAAe5IE4QAAAACZqU9pAAAAAGzMKdEAAAAAZuqdjQAAAABnaWP4AAAAAPW3T6wAAAAApqmHCgAAAACkUeS0AAAAANd0gnIAAAAAJ1219gAAAADYblUyAAAAANaeA3MAAAAA2JW20QAAAABE13o1AAAAAHE31nUAAAAAmABkGAAAAABaQFzGAAAAAKeAVTQAAAAAOU+S1wAAAABpLdz8AAAAAHOfZiIAAAAAKWLzEgAAAABSkJu/AAAAAKedPQQAAAAACtxWlAAAAABVHrLEAAAAAIgBSwMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
|
| 117 |
+
"cuda_rng_state": "BWVGVHP/DAAAAAAAAAAAAA=="
|
| 118 |
+
}
|