L_MIN = 0.5 L_MAX = 2.0 SIGMA_ZERO_EPS = 1e-8 def update_learning_ratio(current_L: float, smoothing_beta: float, learn_obs: float) -> float: """EMA update for learning ratio with clamping. Args: current_L: current learning ratio value smoothing_beta: EMA smoothing factor (0.0–0.9999) learn_obs: observation ratio = ||epsilon_hat|| / (||epsilon_real|| + 1e-8) Returns: new learning ratio (clamped) """ new_L = smoothing_beta * current_L + (1.0 - smoothing_beta) * learn_obs if new_L < L_MIN: new_L = L_MIN elif new_L > L_MAX: new_L = L_MAX return new_L def scale_epsilon_hat(epsilon_hat, learning_ratio: float): """Scale predicted epsilon by 1/L (with tiny floor to avoid div-by-zero).""" import torch return epsilon_hat / max(learning_ratio, 1e-8)