Spaces:
Sleeping
Sleeping
File size: 1,360 Bytes
0b6a889 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | """
FinePrint Training Configuration
"""
from dataclasses import dataclass
from typing import Optional
@dataclass
class TrainingConfig:
# Model
model_name: str = "unsloth/Qwen2.5-1.5B-Instruct"
max_seq_length: int = 2048
lora_r: int = 16
lora_alpha: int = 32
lora_dropout: float = 0.05
# Training
num_episodes: int = 200
batch_size: int = 4
learning_rate: float = 2e-5
weight_decay: float = 0.01
warmup_steps: int = 100
max_grad_norm: float = 1.0
gradient_accumulation_steps: int = 4
# RL
gamma: float = 0.99
gae_lambda: float = 0.95
clip_epsilon: float = 0.2
value_loss_coef: float = 0.5
entropy_coef: float = 0.01
num_rollouts_per_update: int = 8
# Environment
num_workflows_per_episode: int = 5
max_episode_steps: int = 60
drift_probability: float = 0.25
silent_drift_ratio: float = 0.70
# Paths
policies_dir: str = "policies"
output_dir: str = "outputs"
checkpoint_dir: str = "outputs/checkpoints"
log_dir: str = "outputs/logs"
# Logging
log_every_n_episodes: int = 8
save_every_n_episodes: int = 50
eval_every_n_episodes: int = 40
use_wandb: bool = False
wandb_project: str = "fineprint"
wandb_run_name: Optional[str] = None
# Evaluation
eval_episodes: int = 10
eval_seed: int = 42
|