thomas-schweich commited on
Commit
929fee4
·
verified ·
1 Parent(s): ac274df

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. README.md +4 -28
  2. config.json +39 -0
  3. metrics.jsonl +0 -0
  4. model.safetensors +3 -0
  5. optimizer.safetensors +3 -0
  6. training_state.json +118 -0
README.md CHANGED
@@ -6,28 +6,12 @@ tags:
6
  - transformer
7
  - causal-lm
8
  - world-model
9
- datasets:
10
- - random-self-play
11
- model-index:
12
- - name: pawn-small
13
- results:
14
- - task:
15
- type: next-move-prediction
16
- metrics:
17
- - name: Val Loss
18
- type: loss
19
- value: 3.15
20
- - name: Val Accuracy
21
- type: accuracy
22
- value: 6.7
23
  ---
24
 
25
  # PAWN-SMALL
26
 
27
  A causal transformer trained on random chess games, designed as a testbed for finetuning and augmentation methods at small scales.
28
 
29
- ## Model Details
30
-
31
  | | |
32
  |---|---|
33
  | **Parameters** | 9.5M |
@@ -35,31 +19,23 @@ A causal transformer trained on random chess games, designed as a testbed for fi
35
  | **d_model** | 256 |
36
  | **Layers** | 8 |
37
  | **Heads** | 4 |
38
- | **Vocabulary** | 4,278 tokens (4,096 grid + 176 promotions + 5 outcomes + 1 PAD) |
39
- | **Sequence length** | 256 |
40
- | **Training steps** | 80K/100K |
41
- | **Best val loss** | 3.150 (step 80,000) |
42
  | **Best val accuracy** | 6.7% |
43
 
44
  ## Usage
45
 
46
  ```python
47
- import torch
48
  from pawn.config import CLMConfig
49
  from pawn.model import PAWNCLM
50
 
51
  cfg = CLMConfig.small()
52
  model = PAWNCLM(cfg)
53
-
54
- ckpt = torch.load("model.pt", map_location="cpu", weights_only=False)
55
- model.load_state_dict(ckpt["model_state_dict"])
56
  model.eval()
57
  ```
58
 
59
- ## Training
60
-
61
- Trained from scratch on random self-play games generated by a Rust chess engine (shakmaty).
62
- See the [PAWN repository](https://github.com/thomas-schweich/PAWN) for training code, data pipeline, and evaluation suite.
63
 
64
  ## License
65
 
 
6
  - transformer
7
  - causal-lm
8
  - world-model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  ---
10
 
11
  # PAWN-SMALL
12
 
13
  A causal transformer trained on random chess games, designed as a testbed for finetuning and augmentation methods at small scales.
14
 
 
 
15
  | | |
16
  |---|---|
17
  | **Parameters** | 9.5M |
 
19
  | **d_model** | 256 |
20
  | **Layers** | 8 |
21
  | **Heads** | 4 |
22
+ | **Best val loss** | 3.1500 (step 81,000) |
 
 
 
23
  | **Best val accuracy** | 6.7% |
24
 
25
  ## Usage
26
 
27
  ```python
28
+ from safetensors.torch import load_file
29
  from pawn.config import CLMConfig
30
  from pawn.model import PAWNCLM
31
 
32
  cfg = CLMConfig.small()
33
  model = PAWNCLM(cfg)
34
+ model.load_state_dict(load_file("model.safetensors"))
 
 
35
  model.eval()
36
  ```
37
 
38
+ See the [PAWN repository](https://github.com/thomas-schweich/PAWN) for training code and evaluation suite.
 
 
 
39
 
40
  ## License
41
 
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "format_version": 1,
3
+ "checkpoint_type": "pretrain",
4
+ "model_config": {
5
+ "vocab_size": 4278,
6
+ "max_seq_len": 256,
7
+ "n_outcomes": 5,
8
+ "d_model": 256,
9
+ "n_layers": 8,
10
+ "n_heads": 4,
11
+ "d_ff": 1024,
12
+ "dropout": 0.0,
13
+ "rope_base": 10000.0
14
+ },
15
+ "training_config": {
16
+ "lr": 0.0003,
17
+ "weight_decay": 0.01,
18
+ "max_grad_norm": 1.0,
19
+ "warmup_steps": 1000,
20
+ "total_steps": 100000,
21
+ "batch_size": 256,
22
+ "max_ply": 256,
23
+ "discard_ply_limit": false,
24
+ "num_workers": 4,
25
+ "use_amp": true,
26
+ "accumulation_steps": 1,
27
+ "log_interval": 10,
28
+ "eval_interval": 500,
29
+ "checkpoint_interval": 5000,
30
+ "base_seed": 42,
31
+ "val_seed": 9223372036854775807,
32
+ "val_games": 512,
33
+ "checkpoint_dir": "logs/run_20260322_194632/checkpoints",
34
+ "log_dir": "logs",
35
+ "use_wandb": false,
36
+ "wandb_project": "pawn",
37
+ "device": "cuda"
38
+ }
39
+ }
metrics.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e376e1fb26da29408a2b65f64b177417b2692b32d1f1ecab2b9973c224c9b29
3
+ size 38102280
optimizer.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c48cda6c937c0572ea98abc2ecb9708bd9b47c972b7dc11c2ffc4b588fe1cba
3
+ size 76210148
training_state.json ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "format_version": 1,
3
+ "global_step": 80000,
4
+ "scheduler_state_dict": {
5
+ "step": 80000
6
+ },
7
+ "scaler_state_dict": {
8
+ "scale": 2097152.0,
9
+ "growth_factor": 2.0,
10
+ "backoff_factor": 0.5,
11
+ "growth_interval": 2000,
12
+ "_growth_tracker": 19
13
+ },
14
+ "optimizer_meta": {
15
+ "param_groups": [
16
+ {
17
+ "lr": 5.628851523330708e-05,
18
+ "betas": [
19
+ 0.9,
20
+ 0.95
21
+ ],
22
+ "eps": 1e-08,
23
+ "weight_decay": 0.01,
24
+ "amsgrad": false,
25
+ "maximize": false,
26
+ "foreach": null,
27
+ "capturable": false,
28
+ "differentiable": false,
29
+ "fused": null,
30
+ "decoupled_weight_decay": true,
31
+ "params": [
32
+ 0,
33
+ 1,
34
+ 2,
35
+ 3,
36
+ 4,
37
+ 5,
38
+ 6,
39
+ 7,
40
+ 8,
41
+ 9,
42
+ 10,
43
+ 11,
44
+ 12,
45
+ 13,
46
+ 14,
47
+ 15,
48
+ 16,
49
+ 17,
50
+ 18,
51
+ 19,
52
+ 20,
53
+ 21,
54
+ 22,
55
+ 23,
56
+ 24,
57
+ 25,
58
+ 26,
59
+ 27,
60
+ 28,
61
+ 29,
62
+ 30,
63
+ 31,
64
+ 32,
65
+ 33,
66
+ 34,
67
+ 35,
68
+ 36,
69
+ 37,
70
+ 38,
71
+ 39,
72
+ 40,
73
+ 41,
74
+ 42,
75
+ 43,
76
+ 44,
77
+ 45,
78
+ 46,
79
+ 47,
80
+ 48,
81
+ 49,
82
+ 50,
83
+ 51,
84
+ 52,
85
+ 53,
86
+ 54,
87
+ 55,
88
+ 56,
89
+ 57,
90
+ 58,
91
+ 59,
92
+ 60,
93
+ 61,
94
+ 62,
95
+ 63,
96
+ 64,
97
+ 65,
98
+ 66,
99
+ 67,
100
+ 68,
101
+ 69,
102
+ 70,
103
+ 71,
104
+ 72,
105
+ 73,
106
+ 74,
107
+ 75,
108
+ 76,
109
+ 77,
110
+ 78
111
+ ]
112
+ }
113
+ ],
114
+ "scalars": null
115
+ },
116
+ "torch_rng_state": "2zhnZJ431tEfAAAAAQAAAFICAAAAAAAAAyBJCgAAAACmJ+YoAAAAABphNIkAAAAAqzTvZAAAAABLASERAAAAAI1bxlEAAAAAKLPJhQAAAABEfrWMAAAAAINGcPUAAAAAzDff0gAAAADvo4FsAAAAAGSegpwAAAAAor3sFgAAAAALhkLvAAAAAPFsF14AAAAAfK5c1QAAAABGGsasAAAAACY4pNUAAAAAgnm6hQAAAAApcHKbAAAAAG7DejYAAAAAa6GLggAAAADqIkiQAAAAACoEFkgAAAAA10a9ewAAAAD7DanqAAAAAMW+FMIAAAAAm6ixdAAAAAC9O+3+AAAAAEUHTRYAAAAAIJdV4QAAAACKOka3AAAAAPVF8GIAAAAAkjiHQwAAAACXU1i0AAAAABpReokAAAAAVqjwWAAAAAA8jwkkAAAAAM9o8j4AAAAAFrJtxQAAAABV+spQAAAAABiaTGEAAAAA5Nk5KQAAAABAKP5HAAAAAPLe/P0AAAAAvXTccQAAAAD5xLH4AAAAAEjIwXYAAAAAOTAUrQAAAABV4bn0AAAAANmSYSMAAAAArswEDwAAAAD/roBiAAAAAJFZbDwAAAAA368GMwAAAABYcI+wAAAAAMAbsWYAAAAATG496QAAAACaqhZEAAAAAONDseoAAAAA2URCvgAAAABKoJ2kAAAAAOyUa9gAAAAA+8ZYMgAAAAD3dkybAAAAAMqVYoAAAAAAcyTvnwAAAAD3azk/AAAAAAdmOgAAAAAAX6Sf8gAAAAAiMEPLAAAAAD1HPUcAAAAAFQ1VngAAAAA2U0VLAAAAAErEAYEAAAAA6kD1CgAAAABQrSj5AAAAAEGgu00AAAAA3VeZYQAAAADjctnXAAAAAJXwtBAAAAAA0o4krwAAAAAjL4u4AAAAAIeRP2EAAAAAG/CxMwAAAABUf7j+AAAAAEWSbe4AAAAAcqgboQAAAACDxGefAAAAACivMwkAAAAAZq3sqgAAAADf+q5cAAAAAKVDjwEAAAAA6ERMpAAAAAAGTAnIAAAAAGXQ2PYAAAAA9lmGDQAAAADOd1UoAAAAAPjaBNMAAAAAi21CLgAAAAD3Hu+GAAAAANt/5fwAAAAAq4PJ8QAAAAANjLGQAAAAADHF8gIAAAAAA4YKYAAAAABczHdkAAAAAHRZcgMAAAAAkHJ7hQAAAAAg7G4PAAAAAKlccNYAAAAAsjp/IwAAAADq8FaoAAAAACzfn4kAAAAAHImxaAAAAAA0fEe8AAAAALICc6QAAAAAepiPFwAAAACTmtTZAAAAADhpaegAAAAAY+9cEwAAAAARWb9NAAAAAArVKXMAAAAAwo+9tAAAAACURhbEAAAAAGq6P4MAAAAAH42CGQAAAACqXbzcAAAAAP5NoMQAAAAA5h5bwAAAAAC16/w9AAAAABQ1JyQAAAAAB9ex2QAAAAC28iesAAAAAKX3lrMAAAAAEOGiHQAAAACs3sj5AAAAAOXd8YEAAAAAHB4UBgAAAAA5b06RAAAAAJBFJxoAAAAAWO9mfgAAAADGH9a/AAAAAPzh3GwAAAAAXHL5CwAAAADdXnfuAAAAAJJje2wAAAAA283NYAAAAAA6gyAIAAAAAILwcZAAAAAAVNRD0QAAAAAXlSSRAAAAAF7WOmkAAAAAFeyxVwAAAACrsT5lAAAAAOkeqT8AAAAAdE/pQwAAAADywAuYAAAAAA2DHpcAAAAADnq5uwAAAACqIdYBAAAAACWuSOgAAAAASzTm+QAAAABcKJPOAAAAAHx93WYAAAAA7zTdowAAAABzVR/YAAAAAKU7jkMAAAAAJgzxnAAAAABkvkttAAAAAGym85IAAAAAHaGMVgAAAADUcezbAAAAAN50M9QAAAAAsqzw6gAAAADgjEuLAAAAAFpkhc4AAAAAS41VxwAAAABtHBvMAAAAAEQeUm8AAAAA58ofMwAAAAB/E3JnAAAAAGgr7/cAAAAAvrYq2QAAAAB/NCgQAAAAAF4HfZYAAAAA7aRq3wAAAAD2bQSCAAAAAE1G/XIAAAAARVUARwAAAACUdPYyAAAAAKmMNnsAAAAA8/UMLwAAAACMpKuLAAAAAMc58eUAAAAAexgyIwAAAAA9RQ2QAAAAAMWJFHQAAAAAludx+AAAAABkXeZnAAAAAMY59wAAAAAAt9nOjgAAAABv4blZAAAAAN6Kxp8AAAAAc/qO9QAAAADmnC7fAAAAAN86/0EAAAAAQfmETgAAAACGTbh7AAAAABo/1+sAAAAAUM0DuAAAAADR4s32AAAAAKpdbaEAAAAA0bHFTQAAAADGsskpAAAAAHn87kAAAAAAi78EVgAAAAC8hq5DAAAAAAzW9qgAAAAAenPQ+AAAAACfH0wKAAAAAHVwDJAAAAAA2wbsQQAAAABhoc4gAAAAAOpSnjsAAAAA5hy9ZAAAAAAbnPItAAAAAOCSYlUAAAAAEODVowAAAAAAD/wIAAAAAILf/VcAAAAARzz+yQAAAABu7PKnAAAAAEvJ9jgAAAAAQGuP4AAAAAA9yqdTAAAAAKu5t3EAAAAA+x4LYAAAAABR9msXAAAAANCSjNIAAAAAN0u+LgAAAABrbuR3AAAAACFAyS8AAAAAXn23AQAAAADz/JV9AAAAALQ8RvAAAAAATMji5AAAAABTI8yZAAAAADnPbh4AAAAACVybngAAAADnGG6SAAAAAGxAU1MAAAAAkuMwrgAAAABc3YG5AAAAAEypY44AAAAAKwwTxAAAAAD+M+hqAAAAAP+knI4AAAAA7cymEwAAAABCdTuzAAAAABkHsJkAAAAAETtonwAAAAB6zAO8AAAAADZPIDIAAAAAUpebhQAAAABdhyerAAAAAOVkxpEAAAAAGuPACwAAAAC5wMqtAAAAAL4wAsQAAAAAsqY3zQAAAADSqzRWAAAAAPJPVwQAAAAAKtgpKQAAAADudUQOAAAAAPnxIpoAAAAAp6KMpgAAAACH2C8wAAAAAD1nOL4AAAAARKjBpQAAAACFNZZWAAAAAGAlWOMAAAAAVNlrrwAAAAB+7DmhAAAAAC0GP/IAAAAAEhyiewAAAACB2eH9AAAAAJfII0oAAAAA/JM8CgAAAABnnnTmAAAAAKNo/igAAAAACI8TBwAAAADDWRB7AAAAAGp0IG8AAAAAyaovGQAAAAD8QjozAAAAAKwlVKIAAAAAZ9HEcwAAAADqCGvaAAAAAAlRZqsAAAAA7/0kTQAAAABQwOsmAAAAADZJJmQAAAAA9rCBeAAAAAAEK02/AAAAAKxFGDwAAAAAZsvjCgAAAABm/hoEAAAAAKN5J24AAAAAquoIaAAAAACmC3s0AAAAAP1CwTcAAAAAGe901AAAAABZ5AXHAAAAACVMlmoAAAAAQISckwAAAABRJ1YmAAAAANeEOaEAAAAAq9LDhAAAAACCxZjpAAAAABobS4IAAAAA0vGNuAAAAABIesrPAAAAANujo8gAAAAAK1aK9QAAAAAKH4d0AAAAAKwX8hQAAAAANTOb/gAAAABUO70EAAAAABMWaHsAAAAAGTFkNQAAAAA7CAMoAAAAAF3ig0AAAAAAMV9gegAAAAAvfKroAAAAAFECgfIAAAAA9/9P3AAAAAC6wKWLAAAAABuEQRcAAAAALRZ8SAAAAABhai8dAAAAADTav2gAAAAAg1nHhQAAAAB7Gu5WAAAAAOuCEUwAAAAAlF2PSQAAAADmg0gbAAAAAP8JtnUAAAAApOycXwAAAABlnjNaAAAAAMhUCnoAAAAAqp4DyAAAAADD9scXAAAAADJ07r4AAAAAewTzPgAAAAARFg2hAAAAALbkJ9sAAAAA3PnifwAAAACzGJLcAAAAAFvYk8oAAAAA8wJtawAAAAAVOkwTAAAAAI70q+MAAAAAewNsvwAAAADcPyqRAAAAABegvHQAAAAAicdvagAAAADjmtzOAAAAAL2G9jcAAAAArx5cyAAAAAA0UttJAAAAAMclPW8AAAAACgj67QAAAABSmuCTAAAAAKb3I+AAAAAAz9MRmAAAAADDJ0B+AAAAAOcDoJQAAAAADXnGewAAAAA3WxulAAAAAMLKAnoAAAAA93czLwAAAABlrGr8AAAAAK1KyecAAAAA8s36igAAAACzPnl1AAAAAEXOnscAAAAAnMhNYAAAAAD6xPH+AAAAAN1vhjIAAAAAz+V+kgAAAABchZg1AAAAAK3PGe0AAAAAGfe0QQAAAADzQTaaAAAAADf1qyQAAAAAGErQ9gAAAADHCbkJAAAAAGT06wEAAAAAAvRsgAAAAAAd1kTSAAAAANdzzVMAAAAA+R0SdQAAAAD3zj5nAAAAACED/ioAAAAAXyPRDQAAAACxcWG1AAAAAODDm5wAAAAAzBWDQAAAAAC9oZKoAAAAAOU3OaUAAAAAcOfakAAAAABXKIvdAAAAAMTp2hgAAAAAxDfBMAAAAADjcx2oAAAAAPcLw/MAAAAAvejwmgAAAAA5+25dAAAAALO6SgwAAAAAubf29gAAAADlL+kgAAAAABntOJUAAAAAcBQthwAAAABxASGhAAAAAIdJmB4AAAAAmlktkAAAAADKzPcCAAAAAGhxL/oAAAAA9Wi9nAAAAAApt1ZbAAAAAHmXm9AAAAAANod7kgAAAABT6vmEAAAAADx7xdIAAAAAfUy1KQAAAABcJzFjAAAAACFSK7IAAAAALn+8yQAAAACH86irAAAAAIbQuqMAAAAAKdGRBAAAAAB/HRK0AAAAANFw29kAAAAAsTshzgAAAADKK5D8AAAAAIhIcuIAAAAAeRHoKwAAAACsmca/AAAAAFl7CBIAAAAAHONGQAAAAAB0S768AAAAAPQvi5gAAAAAPiirYwAAAADQkjgFAAAAAMcjQaIAAAAASX2srgAAAADg2JjAAAAAAJ94ojYAAAAAj+qstgAAAADbNJGAAAAAACFGBisAAAAAIc3ciAAAAADKiiKnAAAAAPxDpbQAAAAA8wX98AAAAAAF6EpJAAAAAF6/ROAAAAAAcmwf4AAAAAAt5/j8AAAAACwBJckAAAAAyqPNVAAAAACRmbXYAAAAAERXa2MAAAAA9sy6xgAAAAATrrGkAAAAAAhdM/UAAAAAmP1CowAAAADaacjGAAAAAA6gVd0AAAAACO1NygAAAAC0E7ukAAAAABlFwR4AAAAAd/UCVQAAAAD2DgwPAAAAAMEQpuoAAAAA4BPxAwAAAADtrb1TAAAAACytSTUAAAAAmfxdVwAAAACxUP8EAAAAAAlh/eoAAAAAvtgeKQAAAADz91O+AAAAAOs+QJ4AAAAAb9vXZgAAAAA7cn0QAAAAABZf0qkAAAAA95cwfQAAAAA3X6OUAAAAAH5toRwAAAAAZdiREwAAAAC/osW7AAAAAOBkPJEAAAAAqUd3HgAAAADeeaWxAAAAAI04iAIAAAAAjf5bXwAAAADC4WcUAAAAANL6fQEAAAAAvDne+gAAAAD3qxUPAAAAAHxsFCgAAAAALe7/PwAAAACtUPlmAAAAAMDiMfEAAAAAzaVWFwAAAADRZiWeAAAAACKGEHoAAAAAHcdEVQAAAADNUfgbAAAAAO52ChMAAAAAyvfYCAAAAAAjBp6jAAAAACOtvOUAAAAApGAN5AAAAAA9vu1EAAAAALQrB4sAAAAAdN8GkgAAAADtqNkoAAAAADdubckAAAAAPHWFrAAAAABtkEvxAAAAAHD/oYwAAAAAtkw1lgAAAACXaLDTAAAAAP6z0nEAAAAAbivo1gAAAABPIeRPAAAAAGusK8MAAAAA95YZDwAAAAA2/aE6AAAAACo7JzIAAAAAJsQAFwAAAAA/HhM3AAAAAEZ9jTwAAAAA2tD3NgAAAABBOc1tAAAAANt7AA8AAAAA+AnzogAAAAArGbv0AAAAAN8xRH8AAAAA9XZexAAAAAAIglw4AAAAAKiSvFUAAAAAY7sxrQAAAACiPFwSAAAAAMj2GXEAAAAA/ojvvgAAAAC4Dl6RAAAAAKY78dQAAAAACI9FAQAAAADmRDUdAAAAAIx8WIkAAAAAkENGLAAAAAAxfqrVAAAAACmYzo4AAAAAqY50wAAAAACj/3YEAAAAAMcHM9cAAAAA21X1sQAAAAAoiSAcAAAAADCa9i8AAAAAoyeQ/AAAAABjxOl3AAAAAJe1i0AAAAAApVL3yQAAAAAlQhzeAAAAAEzjsa0AAAAAf2M0OgAAAABaJ4XXAAAAAA3rSC4AAAAAE0JHVQAAAABHyyy0AAAAANmcpiEAAAAAwjVohAAAAABi7s8GAAAAAMjLJFwAAAAAztbf6gAAAAAXvX5AAAAAAM6yyhwAAAAAbdIJrQAAAAA0xjEsAAAAAHCpPygAAAAAk5d+0QAAAAD79LyfAAAAAIvxJRAAAAAA61/ATgAAAADqAj8DAAAAADJqeUAAAAAAwnFPYgAAAAAV8B2bAAAAAAQw2NoAAAAAG56UnwAAAAB3guyWAAAAAOHc4mAAAAAAX0psmwAAAABNVlS6AAAAAKSJY/AAAAAAb6UTzwAAAAC8kjkhAAAAAH/4bYYAAAAA1aMp7gAAAACGNN6tAAAAADDYQEEAAAAAN5SjvgAAAABscqamAAAAAHBrsrkAAAAA903iOQAAAAAbmK5hAAAAAI4MjqcAAAAAvMrRVQAAAACrjnmqAAAAAMZq/GoAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
117
+ "cuda_rng_state": "nfeOSbJFGwAAAAAAAAAAAA=="
118
+ }