File size: 1,360 Bytes
0b6a889
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
"""
FinePrint Training Configuration
"""

from dataclasses import dataclass
from typing import Optional


@dataclass
class TrainingConfig:
    # Model
    model_name: str = "unsloth/Qwen2.5-1.5B-Instruct"
    max_seq_length: int = 2048
    lora_r: int = 16
    lora_alpha: int = 32
    lora_dropout: float = 0.05

    # Training
    num_episodes: int = 200
    batch_size: int = 4
    learning_rate: float = 2e-5
    weight_decay: float = 0.01
    warmup_steps: int = 100
    max_grad_norm: float = 1.0
    gradient_accumulation_steps: int = 4

    # RL
    gamma: float = 0.99
    gae_lambda: float = 0.95
    clip_epsilon: float = 0.2
    value_loss_coef: float = 0.5
    entropy_coef: float = 0.01
    num_rollouts_per_update: int = 8

    # Environment
    num_workflows_per_episode: int = 5
    max_episode_steps: int = 60
    drift_probability: float = 0.25
    silent_drift_ratio: float = 0.70

    # Paths
    policies_dir: str = "policies"
    output_dir: str = "outputs"
    checkpoint_dir: str = "outputs/checkpoints"
    log_dir: str = "outputs/logs"

    # Logging
    log_every_n_episodes: int = 8
    save_every_n_episodes: int = 50
    eval_every_n_episodes: int = 40
    use_wandb: bool = False
    wandb_project: str = "fineprint"
    wandb_run_name: Optional[str] = None

    # Evaluation
    eval_episodes: int = 10
    eval_seed: int = 42