fineprint-env / config.py
vigneshmoovendhan's picture
Fine Print RL final
0b6a889
"""
FinePrint Training Configuration
"""
from dataclasses import dataclass
from typing import Optional
@dataclass
class TrainingConfig:
# Model
model_name: str = "unsloth/Qwen2.5-1.5B-Instruct"
max_seq_length: int = 2048
lora_r: int = 16
lora_alpha: int = 32
lora_dropout: float = 0.05
# Training
num_episodes: int = 200
batch_size: int = 4
learning_rate: float = 2e-5
weight_decay: float = 0.01
warmup_steps: int = 100
max_grad_norm: float = 1.0
gradient_accumulation_steps: int = 4
# RL
gamma: float = 0.99
gae_lambda: float = 0.95
clip_epsilon: float = 0.2
value_loss_coef: float = 0.5
entropy_coef: float = 0.01
num_rollouts_per_update: int = 8
# Environment
num_workflows_per_episode: int = 5
max_episode_steps: int = 60
drift_probability: float = 0.25
silent_drift_ratio: float = 0.70
# Paths
policies_dir: str = "policies"
output_dir: str = "outputs"
checkpoint_dir: str = "outputs/checkpoints"
log_dir: str = "outputs/logs"
# Logging
log_every_n_episodes: int = 8
save_every_n_episodes: int = 50
eval_every_n_episodes: int = 40
use_wandb: bool = False
wandb_project: str = "fineprint"
wandb_run_name: Optional[str] = None
# Evaluation
eval_episodes: int = 10
eval_seed: int = 42