| | import torch |
| | from torch.optim import Optimizer |
| | import math |
| |
|
| | """ |
| | EmoAiry v3.7.6 (260109) shadow-system v3.1 -moment v3.1 emoPulse v3.7 |
| | EmoFact v3.6 継承 emoDrive 機構を emoPulse へ統合し簡略化(循環器的機構) |
| | emoPulse 機構により完全自動化を目指す(ユーザーによる emoScope 調整可/改善度反映率) |
| | dNR係数により emoPulse に履歴を混ぜて安定させた(d / N 履歴 による信頼度の維持) |
| | Early scalar、Early Stop、効率化しつつ精度向上させ負荷も軽減する等の改修と微調整 |
| | """ |
| |
|
| | class EmoAiry(Optimizer): |
| | |
| | def __init__(self, params, |
| | lr=1.0, |
| | eps=1e-8, |
| | betas=(0.9, 0.995), |
| | weight_decay=0.01, |
| | use_shadow:bool=False): |
| | defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay) |
| | super().__init__(params, defaults) |
| | self._init_lr = lr |
| | self.should_stop = False |
| | self.use_shadow = use_shadow |
| | self.emoScope = lr |
| | self.dNR_hist = 1.0 |
| | self.noise_est = 1.0 |
| | self.d_est = 0.02 |
| |
|
| | |
| | def _update_ema(self, state, loss_val): |
| | ema = state.setdefault('ema', {}) |
| | ema['short'] = 0.3 * loss_val + 0.7 * ema.get('short', loss_val) |
| | ema['medium'] = 0.05 * loss_val + 0.95 * ema.get('medium', loss_val) |
| | ema['long'] = 0.01 * loss_val + 0.99 * ema.get('long', loss_val) |
| | return ema |
| |
|
| | |
| | |
| | |
| | |
| | |
| | def _compute_scalar(self, ema): |
| | scale_base_l = max(ema['long'], 1e-5) |
| | scale_base_m = max(ema['medium'], 1e-5) |
| | diff_base = ema['long'] - ema['short'] |
| | diff_l = diff_base / scale_base_l |
| | diff_m = diff_base / scale_base_m |
| | |
| | if abs(diff_l) < 0.05: |
| | return math.tanh(diff_l) |
| | |
| | if abs(diff_m) * scale_base_m < abs(diff_l) * scale_base_l: |
| | return math.tanh(diff_m) |
| | else: |
| | return math.tanh(diff_l) |
| |
|
| | |
| | |
| | |
| | |
| | def _decide_ratio(self, scalar): |
| | if not self.use_shadow: |
| | return 0.0 |
| | if abs(scalar) > 0.625: |
| | return 1.0 - abs(scalar) |
| | else: |
| | return 0.0 |
| |
|
| | |
| | @torch.no_grad() |
| | def step(self, closure=None): |
| | loss = closure() if closure is not None else None |
| | loss_val = loss.item() if loss is not None else 0.0 |
| |
|
| | |
| | ema = self._update_ema(self.state, loss_val) |
| | scalar = self._compute_scalar(ema) |
| | ratio = self._decide_ratio(scalar) |
| | trust = math.copysign((1.0 - abs(scalar)), scalar) |
| |
|
| | |
| | |
| | |
| | self.noise_est = 0.97 * self.noise_est + 0.03 * abs(scalar) |
| | self.d_est = 0.97 * self.d_est + 0.03 * abs(trust) |
| | noise = max(self.noise_est, 1e-8) |
| | d = self.d_est |
| | |
| | Noise_base = abs(scalar - trust) + 0.1 |
| | d_base = abs(noise - d) + 0.1 |
| | |
| | dNR_now_val = (d_base / Noise_base) ** 2 |
| | |
| | if dNR_now_val >= self.dNR_hist and trust >= 0.5: |
| | |
| | self.dNR_hist = min(dNR_now_val, self.dNR_hist * 1.05) |
| | elif -0.5 <= trust <= 0.5: |
| | |
| | self.dNR_hist = dNR_now_val * 0.98 |
| | |
| | emoPulse = max(min(self.dNR_hist * (self.emoScope * 1e-4), 3e-3), 1e-6) |
| | |
| |
|
| | for group in self.param_groups: |
| | for p in group['params']: |
| | if p.grad is None: |
| | continue |
| |
|
| | grad = p.grad |
| | state = self.state[p] |
| |
|
| | |
| | |
| | |
| | |
| | |
| | if self.use_shadow : |
| | if 'shadow' not in state: |
| | state['shadow'] = p.clone() |
| | if ratio > 0: |
| | p.mul_(1-ratio).add_(state['shadow'], alpha=abs(trust)) |
| | else: |
| | leap_ratio = 0.1 * abs(trust) |
| | state['shadow'].lerp_(p, leap_ratio) |
| |
|
| | |
| | |
| | if grad.dim() >= 2: |
| | |
| | r_sq = torch.mean(grad * grad, dim=tuple(range(1, grad.dim())), keepdim=True).add_(group['eps']) |
| | c_sq = torch.mean(grad * grad, dim=0, keepdim=True).add_(group['eps']) |
| |
|
| | |
| | |
| | |
| | beta1, beta2 = group['betas'] |
| | state.setdefault('exp_avg_r', torch.zeros_like(r_sq)).mul_(beta1).add_(torch.sqrt(r_sq), alpha=1 - beta1) |
| | state.setdefault('exp_avg_c', torch.zeros_like(c_sq)).mul_(beta1).add_(torch.sqrt(c_sq), alpha=1 - beta1) |
| |
|
| | |
| | denom = torch.sqrt(state['exp_avg_r'] * state['exp_avg_c']).add_(group['eps']) |
| | |
| | update_term = grad / denom |
| |
|
| | |
| | else: |
| | beta1, beta2 = group['betas'] |
| | exp_avg_sq = state.setdefault('exp_avg_sq', torch.zeros_like(p)) |
| | exp_avg_sq.mul_(beta1).addcmul_(grad, grad, value=(1 - beta2)) |
| | denom = exp_avg_sq.sqrt().add_(group['eps']) |
| | |
| | update_term = grad / denom |
| |
|
| | |
| | |
| | p.mul_(1.0 - group['weight_decay'] * emoPulse) |
| | p.add_(update_term.sign_(), alpha=-emoPulse) |
| | |
| |
|
| | |
| | for group in self.param_groups: |
| | group['lr'] = emoPulse |
| |
|
| | |
| | |
| | |
| | if abs(scalar) <= 5e-6 and abs(Noise_base - d_base) <= 5e-7: |
| | self.should_stop = True |
| | self.emoScope = 1.0 |
| | else: |
| | self.should_stop = False |
| |
|
| | return |
| |
|
| | """ |
| | https://github.com/muooon/EmoSens |
| | Airy is inspired by Adafactor, and emofact, |
| | and its VRAM-friendly design is something everyone loves. |
| | """ |
| |
|