Spaces:
Sleeping
Sleeping
| """ | |
| FinePrint Training Configuration | |
| """ | |
| from dataclasses import dataclass | |
| from typing import Optional | |
| class TrainingConfig: | |
| # Model | |
| model_name: str = "unsloth/Qwen2.5-1.5B-Instruct" | |
| max_seq_length: int = 2048 | |
| lora_r: int = 16 | |
| lora_alpha: int = 32 | |
| lora_dropout: float = 0.05 | |
| # Training | |
| num_episodes: int = 200 | |
| batch_size: int = 4 | |
| learning_rate: float = 2e-5 | |
| weight_decay: float = 0.01 | |
| warmup_steps: int = 100 | |
| max_grad_norm: float = 1.0 | |
| gradient_accumulation_steps: int = 4 | |
| # RL | |
| gamma: float = 0.99 | |
| gae_lambda: float = 0.95 | |
| clip_epsilon: float = 0.2 | |
| value_loss_coef: float = 0.5 | |
| entropy_coef: float = 0.01 | |
| num_rollouts_per_update: int = 8 | |
| # Environment | |
| num_workflows_per_episode: int = 5 | |
| max_episode_steps: int = 60 | |
| drift_probability: float = 0.25 | |
| silent_drift_ratio: float = 0.70 | |
| # Paths | |
| policies_dir: str = "policies" | |
| output_dir: str = "outputs" | |
| checkpoint_dir: str = "outputs/checkpoints" | |
| log_dir: str = "outputs/logs" | |
| # Logging | |
| log_every_n_episodes: int = 8 | |
| save_every_n_episodes: int = 50 | |
| eval_every_n_episodes: int = 40 | |
| use_wandb: bool = False | |
| wandb_project: str = "fineprint" | |
| wandb_run_name: Optional[str] = None | |
| # Evaluation | |
| eval_episodes: int = 10 | |
| eval_seed: int = 42 | |