File size: 1,004 Bytes
1faccd4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# Rollout Correction: corrects off-policy distribution shifts
# See documentation: docs/algo/rollout_corr.md
# Use presets: RolloutCorrectionConfig.decoupled_seq_is(), .bypass_pg_is(), etc.

# IS aggregation level: null (disabled), "token" (per-token), "sequence" (per-sequence)
rollout_is: null

# Upper threshold for IS weight truncation (typical: 2.0-5.0)
rollout_is_threshold: 2.0

# RS aggregation level: null (disabled), e.g. "token_k1", "seq_sum_k1", "seq_mean_k3"
rollout_rs: null

# Threshold for rejection sampling (string or float; see code docs)
rollout_rs_threshold: null

# Operating mode: false = Decoupled (3 policies), true = Bypass (2 policies)
bypass_mode: false

# Loss type in bypass mode (bypass_mode=true):
# - "ppo_clip": PPO clipped objective (IS handled by ratio, default)
# - "reinforce": REINFORCE with explicit IS weights (no PPO clipping)
loss_type: ppo_clip

# Batch normalize IS weights: false = raw weights, true = normalize to mean=1.0
rollout_is_batch_normalize: false