Upload 4 files
Browse files- 2Gv38_AutoLR/emoairy.py +6 -3
- 2Gv38_AutoLR/emotion.py +15 -16
2Gv38_AutoLR/emoairy.py
CHANGED
|
@@ -146,13 +146,16 @@ class EmoAiry(Optimizer):
|
|
| 146 |
# 最終的な更新項を計算
|
| 147 |
update_term = grad / denom
|
| 148 |
|
| 149 |
-
# 1次元(ベクトル)
|
| 150 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
# 最終的な更新項を計算
|
| 152 |
-
update_term = grad
|
| 153 |
|
| 154 |
# 最終的なパラメータ更新 (decoupled weight decayも適用)
|
| 155 |
-
# [テンソル]2D以上:不正確、1D:正確、[履歴]2D以上:正確化、1D:ナシ、でバランス改善
|
| 156 |
# sign化で2次momentと1次ベクトルのデータの質(粒度)を揃える
|
| 157 |
p.mul_(1.0 - group['weight_decay'] * emoPulse)
|
| 158 |
p.add_(update_term.sign_(), alpha=-emoPulse)
|
|
|
|
| 146 |
# 最終的な更新項を計算
|
| 147 |
update_term = grad / denom
|
| 148 |
|
| 149 |
+
# 1次元(ベクトル)の勾配補正
|
| 150 |
else:
|
| 151 |
+
beta1, beta2 = group['betas']
|
| 152 |
+
exp_avg_sq = state.setdefault('exp_avg_sq', torch.zeros_like(p))
|
| 153 |
+
exp_avg_sq.mul_(beta1).addcmul_(grad, grad, value=(1 - beta2))
|
| 154 |
+
denom = exp_avg_sq.sqrt().add_(group['eps'])
|
| 155 |
# 最終的な更新項を計算
|
| 156 |
+
update_term = grad / denom
|
| 157 |
|
| 158 |
# 最終的なパラメータ更新 (decoupled weight decayも適用)
|
|
|
|
| 159 |
# sign化で2次momentと1次ベクトルのデータの質(粒度)を揃える
|
| 160 |
p.mul_(1.0 - group['weight_decay'] * emoPulse)
|
| 161 |
p.add_(update_term.sign_(), alpha=-emoPulse)
|
2Gv38_AutoLR/emotion.py
CHANGED
|
@@ -72,7 +72,7 @@ class EmoTion(Optimizer):
|
|
| 72 |
|
| 73 |
# 損失取得(損失値 loss_val を数値化、感情判定に使用、存在しないパラメータ(更新不要)はスキップ)
|
| 74 |
@torch.no_grad()
|
| 75 |
-
def step(self, closure=None):
|
| 76 |
loss = torch.enable_grad()(closure)() if closure is not None else None
|
| 77 |
loss_val = loss.item() if loss is not None else 0.0
|
| 78 |
|
|
@@ -131,34 +131,33 @@ class EmoTion(Optimizer):
|
|
| 131 |
# 2次元以上かつ一定サイズ以上を行列近似対象とする
|
| 132 |
# 判定:2次元以上かつ「低ランク化」でメモリコストが全体の 5% 以下の場合に適用
|
| 133 |
if grad.dim() >= 2 and ((d_p[0] + d_p[1]) / p.numel()) < 0.05:
|
| 134 |
-
# 4次元を2次元(行列)として解釈する
|
| 135 |
grad_matrix = grad.view(d_p[0], -1)
|
| 136 |
d0, d1 = grad_matrix.shape
|
| 137 |
-
|
| 138 |
if 'exp_avg_row' not in state:
|
| 139 |
state['exp_avg_row'] = torch.zeros(d0, dtype=grad.dtype, device=grad.device)
|
| 140 |
state['exp_avg_col'] = torch.zeros(d1, dtype=grad.dtype, device=grad.device)
|
|
|
|
| 141 |
|
| 142 |
row, col = state['exp_avg_row'], state['exp_avg_col']
|
| 143 |
|
| 144 |
-
#
|
| 145 |
-
row.mul_(beta2).add_(grad_matrix.mean(dim=1), alpha=1 - beta2)
|
| 146 |
-
col.mul_(beta2).add_(grad_matrix.mean(dim=0), alpha=1 - beta2)
|
| 147 |
|
| 148 |
-
#
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
# grad_matrix 構造情報で「更新ベクトル場」へ変換する
|
| 152 |
-
grad_matrix.mul_(r_filter.unsqueeze(1)).mul_(c_filter.unsqueeze(0))
|
| 153 |
|
| 154 |
-
#
|
| 155 |
-
|
|
|
|
| 156 |
|
| 157 |
else:
|
| 158 |
-
# 1
|
| 159 |
update = grad
|
| 160 |
|
| 161 |
-
# Weight Decay
|
| 162 |
p.mul_(1.0 - group['weight_decay'] * emoPulse)
|
| 163 |
p.add_(update.sign_(), alpha=-emoPulse)
|
| 164 |
# --- End Gradient Update Logic ---
|
|
@@ -180,6 +179,6 @@ class EmoTion(Optimizer):
|
|
| 180 |
|
| 181 |
"""
|
| 182 |
https://github.com/muooon/EmoSens
|
| 183 |
-
Thank you Adafactor and Lion.
|
| 184 |
Believing in a future for democratic AI learning.
|
| 185 |
"""
|
|
|
|
| 72 |
|
| 73 |
# 損失取得(損失値 loss_val を数値化、感情判定に使用、存在しないパラメータ(更新不要)はスキップ)
|
| 74 |
@torch.no_grad()
|
| 75 |
+
def step(self, closure=None):
|
| 76 |
loss = torch.enable_grad()(closure)() if closure is not None else None
|
| 77 |
loss_val = loss.item() if loss is not None else 0.0
|
| 78 |
|
|
|
|
| 131 |
# 2次元以上かつ一定サイズ以上を行列近似対象とする
|
| 132 |
# 判定:2次元以上かつ「低ランク化」でメモリコストが全体の 5% 以下の場合に適用
|
| 133 |
if grad.dim() >= 2 and ((d_p[0] + d_p[1]) / p.numel()) < 0.05:
|
|
|
|
| 134 |
grad_matrix = grad.view(d_p[0], -1)
|
| 135 |
d0, d1 = grad_matrix.shape
|
| 136 |
+
|
| 137 |
if 'exp_avg_row' not in state:
|
| 138 |
state['exp_avg_row'] = torch.zeros(d0, dtype=grad.dtype, device=grad.device)
|
| 139 |
state['exp_avg_col'] = torch.zeros(d1, dtype=grad.dtype, device=grad.device)
|
| 140 |
+
#state['exp_avg_m'] = torch.zeros_like(grad_matrix)
|
| 141 |
|
| 142 |
row, col = state['exp_avg_row'], state['exp_avg_col']
|
| 143 |
|
| 144 |
+
# 1. 行列構造の統計更新(2次モーメントによる地形の解像度維持)
|
| 145 |
+
row.mul_(beta2).add_(grad_matrix.pow(2).mean(dim=1), alpha=1 - beta2)
|
| 146 |
+
col.mul_(beta2).add_(grad_matrix.pow(2).mean(dim=0), alpha=1 - beta2)
|
| 147 |
|
| 148 |
+
# 2. 意志の正規化 (構造情報の抽出)
|
| 149 |
+
inv_sq_row = torch.rsqrt(row.add(group['eps'])).unsqueeze(1)
|
| 150 |
+
inv_sq_col = torch.rsqrt(col.add(group['eps'])).unsqueeze(0)
|
|
|
|
|
|
|
| 151 |
|
| 152 |
+
# 3. 復元と更新ベクトル生成
|
| 153 |
+
# 統計量でスケーリングすることで「信頼できる方向」を浮き彫りにする
|
| 154 |
+
update = (grad_matrix * inv_sq_row * inv_sq_col).view_as(grad)
|
| 155 |
|
| 156 |
else:
|
| 157 |
+
# 1次元/小行列はそのまま流す
|
| 158 |
update = grad
|
| 159 |
|
| 160 |
+
# 最終出力:Weight Decay と Sign 更新の統合
|
| 161 |
p.mul_(1.0 - group['weight_decay'] * emoPulse)
|
| 162 |
p.add_(update.sign_(), alpha=-emoPulse)
|
| 163 |
# --- End Gradient Update Logic ---
|
|
|
|
| 179 |
|
| 180 |
"""
|
| 181 |
https://github.com/muooon/EmoSens
|
| 182 |
+
Thank you Adafactor and Lion.
|
| 183 |
Believing in a future for democratic AI learning.
|
| 184 |
"""
|