Text Generation
Transformers
ONNX
Safetensors
English
gpt2
chess
pgn
causal-lm
game-playing
text-generation-inference
kn1ght-bullet / checkpoints /sft /v2 /config.json
InterwebAlchemy's picture
Upload folder using huggingface_hub
65504f1 verified
raw
history blame contribute delete
825 Bytes
{
"model": {
"vocab_size": 4096,
"n_embd": 256,
"n_head": 4,
"n_layer": 4,
"block_size": 256,
"dropout": 0.1
},
"finetune": {
"checkpoint": ".data/models/kn1ght-sft/ckpt_005000.pt",
"output_dir": ".data/models/kn1ght-sft-v2",
"n_per_opening": 5,
"temperature": 0.7,
"top_k": 40,
"max_gen_tokens": 80,
"min_half_moves": 6,
"hf_dataset": "InterwebAlchemy/pgn-dataset-including-special-tokens",
"hf_mix_games": 10000,
"batch_size": 32,
"learning_rate": 0.0001,
"min_lr": 1e-05,
"max_iters": 5000,
"warmup_iters": 200,
"grad_clip": 1.0,
"weight_decay": 0.1,
"turn_number_weight": 0.15,
"openings_repeat": 5,
"eval_interval": 500,
"eval_iters": 50,
"log_interval": 50,
"save_interval": 1000,
"seed": 42
}
}