Text Generation
Transformers
ONNX
Safetensors
English
gpt2
chess
pgn
causal-lm
game-playing
text-generation-inference
InterwebAlchemy commited on
Commit
65504f1
·
verified ·
1 Parent(s): f962cd6

Upload folder using huggingface_hub

Browse files
checkpoints/sft/v2/ckpt_001000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4235c72b7282828ffd0f1f8fef67680b78957b04943035c9abcdc337792f73b
3
+ size 52267199
checkpoints/sft/v2/ckpt_002000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e368505e338a92d8a4d2a61dd1e7a69cc7dd20ae75623c3b1006af6e1ee5081
3
+ size 52267199
checkpoints/sft/v2/ckpt_003000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:722280e4a93669ef7c343e4269fff99ecf688765744177032fbbc50ee565f905
3
+ size 52267199
checkpoints/sft/v2/ckpt_004000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c342fbab66b2d23b1d5a7a8df21b7601d674c932f3f6aab515717e4b9f7021fe
3
+ size 52267199
checkpoints/sft/v2/ckpt_005000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ff521b0c8869da9d0ff4ec520c8cd12c9d3fe601e3c21a799a0bbf2d8f2304b
3
+ size 52267199
checkpoints/sft/v2/config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "vocab_size": 4096,
4
+ "n_embd": 256,
5
+ "n_head": 4,
6
+ "n_layer": 4,
7
+ "block_size": 256,
8
+ "dropout": 0.1
9
+ },
10
+ "finetune": {
11
+ "checkpoint": ".data/models/kn1ght-sft/ckpt_005000.pt",
12
+ "output_dir": ".data/models/kn1ght-sft-v2",
13
+ "n_per_opening": 5,
14
+ "temperature": 0.7,
15
+ "top_k": 40,
16
+ "max_gen_tokens": 80,
17
+ "min_half_moves": 6,
18
+ "hf_dataset": "InterwebAlchemy/pgn-dataset-including-special-tokens",
19
+ "hf_mix_games": 10000,
20
+ "batch_size": 32,
21
+ "learning_rate": 0.0001,
22
+ "min_lr": 1e-05,
23
+ "max_iters": 5000,
24
+ "warmup_iters": 200,
25
+ "grad_clip": 1.0,
26
+ "weight_decay": 0.1,
27
+ "turn_number_weight": 0.15,
28
+ "openings_repeat": 5,
29
+ "eval_interval": 500,
30
+ "eval_iters": 50,
31
+ "log_interval": 50,
32
+ "save_interval": 1000,
33
+ "seed": 42
34
+ }
35
+ }