Text Generation
Transformers
ONNX
Safetensors
English
gpt2
chess
pgn
causal-lm
game-playing
text-generation-inference
InterwebAlchemy commited on
Commit
ba218ea
·
verified ·
1 Parent(s): 65504f1

Upload folder using huggingface_hub

Browse files
checkpoints/sft/v3/ckpt_001000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0639cea1e0e6a2e46e4f4ca6a16b00947c5a806a35bfe9c2c82d8d690fdc28c
3
+ size 52267199
checkpoints/sft/v3/ckpt_002000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a6be04d2b691d393d5894b5042ce3a47c573ecf3d0af3614ab9a71fc1a384c9
3
+ size 52267199
checkpoints/sft/v3/ckpt_003000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b9892322ffbc9e43a1df21b122119e0ba44231d07c666f1710c320b6b02d277
3
+ size 52267199
checkpoints/sft/v3/ckpt_004000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8bb3e63def5c6f037fc5173bf9385baaa5a9d189f2671ec8b3b5bbf6f9cc7a3
3
+ size 52267199
checkpoints/sft/v3/ckpt_005000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52ddd4ebad89c324f1079416af3278530fd110c43ba83d61687eb702637c4ddc
3
+ size 52267199
checkpoints/sft/v3/config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "vocab_size": 4096,
4
+ "n_embd": 256,
5
+ "n_head": 4,
6
+ "n_layer": 4,
7
+ "block_size": 256,
8
+ "dropout": 0.1
9
+ },
10
+ "finetune": {
11
+ "checkpoint": ".data/models/kn1ght-sft-v2/ckpt_005000.pt",
12
+ "output_dir": ".data/models/kn1ght-sft-v3",
13
+ "n_per_opening": 5,
14
+ "temperature": 0.7,
15
+ "top_k": 40,
16
+ "max_gen_tokens": 80,
17
+ "min_half_moves": 6,
18
+ "hf_dataset": "InterwebAlchemy/pgn-dataset-including-special-tokens",
19
+ "hf_mix_games": 10000,
20
+ "batch_size": 32,
21
+ "learning_rate": 0.0001,
22
+ "min_lr": 1e-05,
23
+ "max_iters": 5000,
24
+ "warmup_iters": 200,
25
+ "grad_clip": 1.0,
26
+ "weight_decay": 0.1,
27
+ "turn_number_weight": 0.15,
28
+ "openings_repeat": 5,
29
+ "eval_interval": 500,
30
+ "eval_iters": 50,
31
+ "log_interval": 50,
32
+ "save_interval": 1000,
33
+ "seed": 42
34
+ }
35
+ }