""" FinePrint Training Configuration """ from dataclasses import dataclass from typing import Optional @dataclass class TrainingConfig: # Model model_name: str = "unsloth/Qwen2.5-1.5B-Instruct" max_seq_length: int = 2048 lora_r: int = 16 lora_alpha: int = 32 lora_dropout: float = 0.05 # Training num_episodes: int = 200 batch_size: int = 4 learning_rate: float = 2e-5 weight_decay: float = 0.01 warmup_steps: int = 100 max_grad_norm: float = 1.0 gradient_accumulation_steps: int = 4 # RL gamma: float = 0.99 gae_lambda: float = 0.95 clip_epsilon: float = 0.2 value_loss_coef: float = 0.5 entropy_coef: float = 0.01 num_rollouts_per_update: int = 8 # Environment num_workflows_per_episode: int = 5 max_episode_steps: int = 60 drift_probability: float = 0.25 silent_drift_ratio: float = 0.70 # Paths policies_dir: str = "policies" output_dir: str = "outputs" checkpoint_dir: str = "outputs/checkpoints" log_dir: str = "outputs/logs" # Logging log_every_n_episodes: int = 8 save_every_n_episodes: int = 50 eval_every_n_episodes: int = 40 use_wandb: bool = False wandb_project: str = "fineprint" wandb_run_name: Optional[str] = None # Evaluation eval_episodes: int = 10 eval_seed: int = 42