Text Generation
Transformers
ONNX
Safetensors
English
gpt2
chess
pgn
causal-lm
game-playing
text-generation-inference
InterwebAlchemy commited on
Commit
f962cd6
·
verified ·
1 Parent(s): 57d4d3f

Upload folder using huggingface_hub

Browse files
checkpoints/sft/v1/ckpt_000100.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87d268e4184ab462d57b53b0f0541338b6848af4cfd9e7bcb3c2953091a5dcab
3
+ size 52267199
checkpoints/sft/v1/ckpt_001000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2120ba480c43bcf5e9b7befce9a69f423d908d7b6aa175d0ae1c41b820c41fa
3
+ size 52267199
checkpoints/sft/v1/ckpt_002000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:058fc09f37114f24572b8423dab508cca906b34237862cb9b666fe1fc95bb826
3
+ size 52267199
checkpoints/sft/v1/ckpt_003000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f78a32c45601977d781097a3f2c09321fdcb19bfef445fcec64a8dab12177fbb
3
+ size 52267199
checkpoints/sft/v1/ckpt_004000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5d2467e53602915c50c8564afab546c560eaaa8c32379b141c451a036fbd989
3
+ size 52267199
checkpoints/sft/v1/ckpt_005000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f02b848ae3e83cf66ba007329b554ccc0b2204b4766b9ab52a6fdd829fd4d12
3
+ size 52267199
checkpoints/sft/v1/config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "vocab_size": 4096,
4
+ "n_embd": 256,
5
+ "n_head": 4,
6
+ "n_layer": 4,
7
+ "block_size": 256,
8
+ "dropout": 0.1
9
+ },
10
+ "finetune": {
11
+ "checkpoint": ".data/models/kn1ght-small/ckpt_200000.pt",
12
+ "output_dir": "/Users/ericallen/Development/_scratchpad/kn1ght/.data/models/kn1ght-sft",
13
+ "n_per_opening": 5,
14
+ "temperature": 0.7,
15
+ "top_k": 40,
16
+ "max_gen_tokens": 80,
17
+ "min_half_moves": 6,
18
+ "hf_dataset": "InterwebAlchemy/pgn-dataset-including-special-tokens",
19
+ "hf_mix_games": 10000,
20
+ "batch_size": 32,
21
+ "learning_rate": 0.0001,
22
+ "min_lr": 1e-05,
23
+ "max_iters": 5000,
24
+ "warmup_iters": 200,
25
+ "grad_clip": 1.0,
26
+ "weight_decay": 0.1,
27
+ "turn_number_weight": 0.15,
28
+ "openings_repeat": 20,
29
+ "eval_interval": 500,
30
+ "eval_iters": 50,
31
+ "log_interval": 50,
32
+ "save_interval": 1000,
33
+ "seed": 42
34
+ }
35
+ }