math-conjecture-model / configs /math_conjecture_scratch.yaml
NorthernTribe-Research's picture
Add tailored from-scratch training stack (tokenizer + random-init LM) for math conjecture solving
e69a71a verified
global:
output_root: model_development/runs/math-conjecture-scratch
seed: 17
tokenizer:
tokenizer_dir: model_development/runs/math-conjecture-scratch/tokenizer
vocab_size: 32768
min_frequency: 2
max_train_rows: 220000
special_tokens:
- <pad>
- <unk>
- <s>
- </s>
- <|system|>
- <|user|>
- <|assistant|>
model:
n_layer: 12
n_head: 12
n_embd: 768
n_positions: 2048
use_bf16: true
resid_pdrop: 0.1
embd_pdrop: 0.1
attn_pdrop: 0.1
initializer_range: 0.02
data:
train_file: data/releases/v1/train.parquet
validation_file: data/releases/v1/validation.parquet
prompt_field: prompt
target_field: target
final_answer_field: final_answer
proof_field: proof_formal
max_seq_length: 2048
max_train_samples: 240000
max_eval_samples: 4500
system_prompt: |
You are NorthernTribe Research's math-conjecture solver.
Recover answers for solved conjectures, produce checkable reasoning, and
preserve formal consistency suitable for Lean verification.
training:
output_dir: model_development/runs/math-conjecture-scratch/checkpoints
num_train_epochs: 1
max_steps: null
per_device_train_batch_size: 1
per_device_eval_batch_size: 1
gradient_accumulation_steps: 32
learning_rate: 2.0e-4
weight_decay: 0.1
warmup_ratio: 0.03
lr_scheduler_type: cosine
max_grad_norm: 1.0
gradient_checkpointing: true
logging_steps: 10
save_steps: 250
eval_steps: 250
save_total_limit: 3
dataloader_num_workers: 2
hub:
push_to_hub: false
repo_id: NorthernTribe-Research/math-conjecture-model
private: false
commit_message: Upload scratch-trained math-conjecture solver model.
credentials:
path: huggingface-api-key.json