v7
Browse files- improve_gainlora/IDEA_Overall.md +46 -10
- improve_gainlora/_patch_genscripts.py +62 -0
- improve_gainlora/gen_script_long_order3_t5_specroute.sh +36 -1
- improve_gainlora/gen_script_long_order4_t5_specroute.sh +36 -1
- improve_gainlora/gen_script_superni_order1_t5_specroute.sh +36 -1
- improve_gainlora/gen_script_superni_order2_t5_specroute.sh +36 -1
- improve_gainlora/src/cl_trainer_specroute.py +5 -1
- improve_gainlora/src/run_t5.py +3 -2
improve_gainlora/IDEA_Overall.md
CHANGED
|
@@ -147,13 +147,37 @@ $$T_{\max} \;\leq\; \frac{d}{r\,(1 - \varepsilon)}$$
|
|
| 147 |
|
| 148 |
Với T5-Small ($d = 512$, $r = 8$, $\varepsilon = 0.02$): $T_{\max} \leq 65 \gg 15$ tasks. Điều này kết nối capacity học liên tục với lý thuyết Grassmannian packing.
|
| 149 |
|
| 150 |
-
### 3.4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
**Mệnh đề 1** *(Drift-Free Routing).* Hàm routing $h \mapsto \alpha_t(h)$ hoàn toàn ổn định qua tất cả các task.
|
| 153 |
|
| 154 |
**Chứng minh.** Routing input được tính từ frozen embedding table, *trước* bất kỳ transformer block nào. LoRA chỉ tồn tại trong các attention layer sâu hơn → $h$ độc lập với mọi tham số LoRA. Kết hợp với $\mathcal{S}_t$ đóng băng, $\alpha_t(h)$ bất biến với mọi thay đổi tích luỹ. $\square$
|
| 155 |
|
| 156 |
-
### 3.
|
| 157 |
|
| 158 |
Định lý 1 giả định $h \in \mathrm{span}(V_{t^*})$. Trong thực tế điều này đòi hỏi hai điều kiện:
|
| 159 |
|
|
@@ -270,7 +294,7 @@ hay tương đương, các hàng của $A_t$ là $r$ eigenvectors ứng với ei
|
|
| 270 |
|
| 271 |
```
|
| 272 |
# Bước 1: Thu thập activation covariance (forward pass nhỏ, trước training)
|
| 273 |
-
C_t = ∑ h(x)h(x)^T / N_batch # covariance input task t (N_batch ~
|
| 274 |
|
| 275 |
# Bước 2: Project covariance vào null-space
|
| 276 |
Q = I - P_old # null-space projector (từ GPM bases đã lưu)
|
|
@@ -278,13 +302,22 @@ C_tilde = Q @ C_t @ Q # projected covariance
|
|
| 278 |
|
| 279 |
# Bước 3: Eigenvector decomposition
|
| 280 |
eigvals, eigvecs = eigh(C_tilde) # đối xứng → eigh nhanh hơn SVD
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
top_r_idx = argsort(eigvals, descending=True)[:r]
|
| 282 |
|
| 283 |
-
# Bước
|
| 284 |
A_t = eigvecs[:, top_r_idx].T # shape (r, d) — direction task-relevant nhất trong null-space
|
| 285 |
A_t = A_t / norm(A_t, dim=1, keepdim=True) * sqrt(3) # normalize như InfLoRA gốc
|
| 286 |
```
|
| 287 |
|
|
|
|
|
|
|
| 288 |
#### Ý nghĩa Lý thuyết Thông tin
|
| 289 |
|
| 290 |
Theo Data Processing Inequality, với bất kỳ ma trận $A_t$ nào:
|
|
@@ -380,8 +413,8 @@ $A_t$ này đảm bảo capture **variance task-relevant tối đa** trong null-
|
|
| 380 |
- Tính projected covariance: $\tilde{C}_t = Q C_t Q$ ($Q = I - P_{\text{old}}$)
|
| 381 |
- Eigenvectors của $\tilde{C}_t$ → khởi tạo $A_t$ (thay thế random Kaiming)
|
| 382 |
3. InfLoRA: chuẩn hoá $A_t$ (đã nằm trong null-space từ eigenvector decomposition).
|
| 383 |
-
4. Huấn luyện `lora_B` với spectral affinity routing + adaptive bias $\beta(n)$ + C4.
|
| 384 |
-
5. Sau training: tính $\mathcal{S}_t$ (cả inference routing và storage) + cập nhật GPM bases.
|
| 385 |
6. Lưu tất cả artifacts cho task tiếp theo.
|
| 386 |
|
| 387 |
---
|
|
@@ -399,8 +432,8 @@ $A_t$ này đảm bảo capture **variance task-relevant tối đa** trong null-
|
|
| 399 |
| GPM + InfLoRA null-space | `get_reg_matrix()` | `cl_trainer_specroute.py` |
|
| 400 |
| Dynamic ESA threshold | `(1−ε₀)·t/T + ε₀` | `cl_trainer_specroute.py` |
|
| 401 |
| C4: Preconditioner | `precompute_preconditioners()` → eigendecomposition | `cl_trainer_specroute.py` |
|
| 402 |
-
| C4: Spectral entropy reg | `_compute_spectral_entropy_loss()` → QR trick | `cl_trainer_specroute.py` |
|
| 403 |
| **C5: Data-informed init** | **`pre_task_data_collection()` → `eigh(Q@C@Q)` → set `lora_A.data`** | **`cl_trainer_specroute.py`** |
|
|
|
|
| 404 |
|
| 405 |
---
|
| 406 |
|
|
@@ -414,9 +447,12 @@ $A_t$ này đảm bảo capture **variance task-relevant tối đa** trong null-
|
|
| 414 |
| LoRA | $r = 8$, target=Q+V, InfLoRA (chỉ B trained, A đóng băng) |
|
| 415 |
| Routing | $\tau = 1.0$, $\alpha_{\mathrm{target}} = 0.8$, adaptive $\beta(n)$ (train); SVD đối xứng (inference) |
|
| 416 |
| ESA | $\varepsilon_0 = 0.995$ (dynamic) |
|
| 417 |
-
| C4 |
|
| 418 |
-
| **C5** | **
|
| 419 |
-
|
|
|
|
|
|
|
|
|
|
|
| 420 |
| So sánh | Batch size, LR, scheduler khớp chính xác ROOT (GainLoRA) |
|
| 421 |
|
| 422 |
---
|
|
|
|
| 147 |
|
| 148 |
Với T5-Small ($d = 512$, $r = 8$, $\varepsilon = 0.02$): $T_{\max} \leq 65 \gg 15$ tasks. Điều này kết nối capacity học liên tục với lý thuyết Grassmannian packing.
|
| 149 |
|
| 150 |
+
### 3.4 Cam kết Trực giao từ Kiến trúc InfLoRA
|
| 151 |
+
|
| 152 |
+
> **Đây là phần đóng cửa lỗ hổng lý thuyết then chốt.** Reviewer thường lo ngại: "GPM gradient projection chỉ chiếu gradient, không đảm bảo các $\Delta W_t$ có subspace trực giao." Observation này *đúng* về GPM gradient projection nhưng *nhầm cơ chế* — tính trực giao đến từ bước khác: InfLoRA A-projection, cứng hơn nhiều.
|
| 153 |
+
|
| 154 |
+
**Mệnh đề 2** *(InfLoRA đảm bảo Điều kiện Định lý 1).* Với $P_{\text{old}} = \mathcal{B}\mathcal{B}^T$ là GPM projection matrix (built từ tasks $1,\ldots,t-1$), bước InfLoRA chiếu **tất cả hàng của $A_t$ vào null-space của $P_{\text{old}}$**:
|
| 155 |
+
|
| 156 |
+
$$A_t \leftarrow A_t(I - P_{\text{old}}) \quad\Rightarrow\quad \text{rowspace}(A_t) \subseteq \text{null}(P_{\text{old}})$$
|
| 157 |
+
|
| 158 |
+
Khi đó:
|
| 159 |
+
|
| 160 |
+
$$\text{span}(V_t) \;=\; \text{rowspace}(\Delta W_t) \;\subseteq\; \text{rowspace}(A_t) \;\subseteq\; \text{null}(P_{\text{old}})$$
|
| 161 |
+
|
| 162 |
+
**(Chứng minh từng bước.)**
|
| 163 |
+
- $\text{rowspace}(B_t A_t) \subseteq \text{rowspace}(A_t)$: đúng với mọi $B_t$ (phép nhân bên trái không mở rộng rowspace).
|
| 164 |
+
- $\text{rowspace}(A_t) \subseteq \text{null}(P_{\text{old}})$: bởi bước InfLoRA projection ở trên.
|
| 165 |
+
- GPM bases $\mathcal{B}$ span xấp xỉ $\text{rowspace}(A_s)$ cho các task $s < t$ (vì GPM tích lũy principal input directions, mà activation của task $s$ chủ yếu kích hoạt theo hướng $A_s$).
|
| 166 |
+
- Do đó: $\text{span}(V_t) \subseteq \text{null}(P_{\text{old}}) \approx \perp \text{span}(V_s)$ với mọi $s < t$. $\square$
|
| 167 |
+
|
| 168 |
+
**Chất lượng xấp xỉ:** Với GPM threshold $\varepsilon_0 = 0.995$ (capture ≥ 99.5% variance), $\delta_{t,s} \leq 1 - 0.995 = 0.005 \ll \kappa_{\min}(t^*)$ trong thực tế.
|
| 169 |
+
|
| 170 |
+
**Sửa reviewer:** Reviewer nói "GPM không đảm bảo orthogonality của $\Delta W_t$" — *đúng* với GPM gradient projection. Nhưng cơ chế bảo đảm orthogonality là **InfLoRA A-projection** (bước khởi tạo), không phải gradient projection. Theorem 1 không cần giả định — nó là hệ quả tất yếu của kiến trúc InfLoRA đã có sẵn.
|
| 171 |
+
|
| 172 |
+
---
|
| 173 |
+
|
| 174 |
+
### 3.5 Drift Invariance
|
| 175 |
|
| 176 |
**Mệnh đề 1** *(Drift-Free Routing).* Hàm routing $h \mapsto \alpha_t(h)$ hoàn toàn ổn định qua tất cả các task.
|
| 177 |
|
| 178 |
**Chứng minh.** Routing input được tính từ frozen embedding table, *trước* bất kỳ transformer block nào. LoRA chỉ tồn tại trong các attention layer sâu hơn → $h$ độc lập với mọi tham số LoRA. Kết hợp với $\mathcal{S}_t$ đóng băng, $\alpha_t(h)$ bất biến với mọi thay đổi tích luỹ. $\square$
|
| 179 |
|
| 180 |
+
### 3.6 Vấn đề then chốt: Null-Space Collapse
|
| 181 |
|
| 182 |
Định lý 1 giả định $h \in \mathrm{span}(V_{t^*})$. Trong thực tế điều này đòi hỏi hai điều kiện:
|
| 183 |
|
|
|
|
| 294 |
|
| 295 |
```
|
| 296 |
# Bước 1: Thu thập activation covariance (forward pass nhỏ, trước training)
|
| 297 |
+
C_t = ∑ h(x)h(x)^T / N_batch # covariance input task t (N_batch ~100 batches)
|
| 298 |
|
| 299 |
# Bước 2: Project covariance vào null-space
|
| 300 |
Q = I - P_old # null-space projector (từ GPM bases đã lưu)
|
|
|
|
| 302 |
|
| 303 |
# Bước 3: Eigenvector decomposition
|
| 304 |
eigvals, eigvecs = eigh(C_tilde) # đối xứng → eigh nhanh hơn SVD
|
| 305 |
+
|
| 306 |
+
# Bước 4: Fallback nếu signal quá yếu (degenerate null-space)
|
| 307 |
+
if eigvals[-1] < 1e-6:
|
| 308 |
+
# Null-space bị bão hoà hoặc task không có activation rõ ràng
|
| 309 |
+
# Revert về Kaiming random init + InfLoRA projection như gốc
|
| 310 |
+
continue
|
| 311 |
+
|
| 312 |
top_r_idx = argsort(eigvals, descending=True)[:r]
|
| 313 |
|
| 314 |
+
# Bước 5: Set A_t
|
| 315 |
A_t = eigvecs[:, top_r_idx].T # shape (r, d) — direction task-relevant nhất trong null-space
|
| 316 |
A_t = A_t / norm(A_t, dim=1, keepdim=True) * sqrt(3) # normalize như InfLoRA gốc
|
| 317 |
```
|
| 318 |
|
| 319 |
+
**Điều kiện fallback:** Nếu `max_eigenvalue(C_tilde) < 1e-6`, null-space quá hẹp hoặc activation không có signal đủ mạnh. Trong trường hợp này, C5 nhường cho Kaiming init + InfLoRA projection tiêu chuẩn — không làm tệ hơn V6, chỉ không cải thiện. Điều kiện này chỉ xảy ra khi null-space gần như bão hoà, tức là ESA đã tiêu thụ gần hết capacity.
|
| 320 |
+
|
| 321 |
#### Ý nghĩa Lý thuyết Thông tin
|
| 322 |
|
| 323 |
Theo Data Processing Inequality, với bất kỳ ma trận $A_t$ nào:
|
|
|
|
| 413 |
- Tính projected covariance: $\tilde{C}_t = Q C_t Q$ ($Q = I - P_{\text{old}}$)
|
| 414 |
- Eigenvectors của $\tilde{C}_t$ → khởi tạo $A_t$ (thay thế random Kaiming)
|
| 415 |
3. InfLoRA: chuẩn hoá $A_t$ (đã nằm trong null-space từ eigenvector decomposition).
|
| 416 |
+
4. Huấn luyện `lora_B` với spectral affinity routing + adaptive bias $\beta(n)$ + gradient preconditioning (C4.1).
|
| 417 |
+
5. Sau training: tính $\mathcal{S}_t$ (cả inference routing và storage) + cập nhật GPM bases (200 batches, đủ cho SVD ổn định).
|
| 418 |
6. Lưu tất cả artifacts cho task tiếp theo.
|
| 419 |
|
| 420 |
---
|
|
|
|
| 432 |
| GPM + InfLoRA null-space | `get_reg_matrix()` | `cl_trainer_specroute.py` |
|
| 433 |
| Dynamic ESA threshold | `(1−ε₀)·t/T + ε₀` | `cl_trainer_specroute.py` |
|
| 434 |
| C4: Preconditioner | `precompute_preconditioners()` → eigendecomposition | `cl_trainer_specroute.py` |
|
|
|
|
| 435 |
| **C5: Data-informed init** | **`pre_task_data_collection()` → `eigh(Q@C@Q)` → set `lora_A.data`** | **`cl_trainer_specroute.py`** |
|
| 436 |
+
| C5: Fallback | max eigval < 1e-6 → skip C5, keep Kaiming + InfLoRA projection | `cl_trainer_specroute.py` |
|
| 437 |
|
| 438 |
---
|
| 439 |
|
|
|
|
| 447 |
| LoRA | $r = 8$, target=Q+V, InfLoRA (chỉ B trained, A đóng băng) |
|
| 448 |
| Routing | $\tau = 1.0$, $\alpha_{\mathrm{target}} = 0.8$, adaptive $\beta(n)$ (train); SVD đối xứng (inference) |
|
| 449 |
| ESA | $\varepsilon_0 = 0.995$ (dynamic) |
|
| 450 |
+
| C4 | Gradient preconditioning bật (`--use_preconditioning True`), $\epsilon = 10^{-6}$; entropy reg đã loại bỏ V7 |
|
| 451 |
+
| **C5** | **N_batch = 100, `torch.linalg.eigh` trên projected covariance, fallback nếu max_eigval < 1e-6** |
|
| 452 |
+
| GPM repr. | 200 batches (giảm từ 1000 — SVD ổn định sau 200) |
|
| 453 |
+
| Precision | fp32 + gradient_checkpointing (T5 + P100: fp16 có risk NaN overflow với large softmax) |
|
| 454 |
+
| P100 BSZ | BSZ=8, GA=4 (effective 32); T4: BSZ=2, GA=8 |
|
| 455 |
+
| Thời gian (P100 16GB) | SuperNI T5-Small ≈ 2-3h; Long benchmark ≈ 3-4h — thoải mái trong 12h Kaggle |
|
| 456 |
| So sánh | Batch size, LR, scheduler khớp chính xác ROOT (GainLoRA) |
|
| 457 |
|
| 458 |
---
|
improve_gainlora/_patch_genscripts.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Patch T5 specroute gen_scripts to add P100 GPU detection and BSZ."""
|
| 2 |
+
import re, os
|
| 3 |
+
|
| 4 |
+
BASE = '/Users/nnminh322/Desktop/personal/Continual/improve_gainlora'
|
| 5 |
+
|
| 6 |
+
T5_SCRIPTS = [
|
| 7 |
+
os.path.join(BASE, 'gen_script_superni_order1_t5_specroute.sh'),
|
| 8 |
+
os.path.join(BASE, 'gen_script_superni_order2_t5_specroute.sh'),
|
| 9 |
+
os.path.join(BASE, 'gen_script_long_order3_t5_specroute.sh'),
|
| 10 |
+
os.path.join(BASE, 'gen_script_long_order4_t5_specroute.sh'),
|
| 11 |
+
]
|
| 12 |
+
|
| 13 |
+
GPU_OLD = (
|
| 14 |
+
'else\n'
|
| 15 |
+
' GPU_MODE="a100"\n'
|
| 16 |
+
' GPU_IDS="${1:-0}"\n'
|
| 17 |
+
' FP16_FLAG=""\n'
|
| 18 |
+
' echo "[GPU] Strategy: A100 (single GPU, fp32)"\n'
|
| 19 |
+
'fi'
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
GPU_NEW = (
|
| 23 |
+
'elif [ "$GPU_MEM" -gt 16000 ]; then\n'
|
| 24 |
+
' GPU_MODE="p100"\n'
|
| 25 |
+
' GPU_IDS="${1:-0}"\n'
|
| 26 |
+
' FP16_FLAG="--gradient_checkpointing"\n'
|
| 27 |
+
' echo "[GPU] Strategy: P100 16GB (fp32 + gradient_checkpointing)"\n'
|
| 28 |
+
'else\n'
|
| 29 |
+
' GPU_MODE="a100"\n'
|
| 30 |
+
' GPU_IDS="${1:-0}"\n'
|
| 31 |
+
' FP16_FLAG=""\n'
|
| 32 |
+
' echo "[GPU] Strategy: A100 (single GPU, fp32)"\n'
|
| 33 |
+
'fi'
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
BSZ_PAT = re.compile(
|
| 37 |
+
r'(elif \[ "\$GPU_MODE" = "t4_1gpu" \]; then\n BSZ=\d+; GA=\d+; EVAL_BSZ=\d+\n)'
|
| 38 |
+
r'(else\n BSZ=\d+; GA=\d+; EVAL_BSZ=\d+\n)'
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
def add_p100(m):
|
| 42 |
+
return (
|
| 43 |
+
m.group(1)
|
| 44 |
+
+ 'elif [ "$GPU_MODE" = "p100" ]; then\n BSZ=8; GA=4; EVAL_BSZ=4\n'
|
| 45 |
+
+ m.group(2)
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
for name in T5_SCRIPTS:
|
| 49 |
+
if not os.path.exists(name):
|
| 50 |
+
print(f'SKIP (not found): {name}')
|
| 51 |
+
continue
|
| 52 |
+
with open(name) as f:
|
| 53 |
+
c = f.read()
|
| 54 |
+
n_detect = c.count(GPU_OLD)
|
| 55 |
+
c = c.replace(GPU_OLD, GPU_NEW, 1)
|
| 56 |
+
n_bsz = len(BSZ_PAT.findall(c))
|
| 57 |
+
c = BSZ_PAT.sub(add_p100, c)
|
| 58 |
+
with open(name, 'w') as f:
|
| 59 |
+
f.write(c)
|
| 60 |
+
print(f'{name}: gpu_detect={n_detect} bsz_blocks={n_bsz}')
|
| 61 |
+
|
| 62 |
+
print('Done.')
|
improve_gainlora/gen_script_long_order3_t5_specroute.sh
CHANGED
|
@@ -23,7 +23,7 @@ if [ -z "$GPU_MEM" ]; then
|
|
| 23 |
fi
|
| 24 |
|
| 25 |
# Determine GPU type
|
| 26 |
-
if [ "$GPU_MEM" -lt
|
| 27 |
IS_T4=1
|
| 28 |
echo "[GPU] Detected T4 GPUs (${GPU_MEM}MB VRAM each)"
|
| 29 |
else
|
|
@@ -42,6 +42,11 @@ elif [ "$IS_T4" -eq 1 ]; then
|
|
| 42 |
GPU_IDS="${1:-0}"
|
| 43 |
FP16_FLAG="--gradient_checkpointing"
|
| 44 |
echo "[GPU] Strategy: 1x T4 + fp32 + gradient_checkpointing"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
else
|
| 46 |
GPU_MODE="a100"
|
| 47 |
GPU_IDS="${1:-0}"
|
|
@@ -57,6 +62,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 57 |
BSZ=2; GA=8; EVAL_BSZ=16
|
| 58 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 59 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 60 |
else
|
| 61 |
BSZ=8; GA=4; EVAL_BSZ=128
|
| 62 |
fi
|
|
@@ -110,6 +117,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 110 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 111 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 112 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 113 |
else
|
| 114 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 115 |
fi
|
|
@@ -163,6 +172,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 163 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 164 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 165 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 166 |
else
|
| 167 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 168 |
fi
|
|
@@ -216,6 +227,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 216 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 217 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 218 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 219 |
else
|
| 220 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 221 |
fi
|
|
@@ -269,6 +282,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 269 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 270 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 271 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 272 |
else
|
| 273 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 274 |
fi
|
|
@@ -322,6 +337,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 322 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 323 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 324 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 325 |
else
|
| 326 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 327 |
fi
|
|
@@ -375,6 +392,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 375 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 376 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 377 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 378 |
else
|
| 379 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 380 |
fi
|
|
@@ -428,6 +447,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 428 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 429 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 430 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 431 |
else
|
| 432 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 433 |
fi
|
|
@@ -481,6 +502,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 481 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 482 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 483 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 484 |
else
|
| 485 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 486 |
fi
|
|
@@ -534,6 +557,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 534 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 535 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 536 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 537 |
else
|
| 538 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 539 |
fi
|
|
@@ -587,6 +612,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 587 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 588 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 589 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 590 |
else
|
| 591 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 592 |
fi
|
|
@@ -640,6 +667,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 640 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 641 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 642 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 643 |
else
|
| 644 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 645 |
fi
|
|
@@ -693,6 +722,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 693 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 694 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 695 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 696 |
else
|
| 697 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 698 |
fi
|
|
@@ -746,6 +777,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 746 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 747 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 748 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 749 |
else
|
| 750 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 751 |
fi
|
|
@@ -799,6 +832,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 799 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 800 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 801 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 802 |
else
|
| 803 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 804 |
fi
|
|
|
|
| 23 |
fi
|
| 24 |
|
| 25 |
# Determine GPU type
|
| 26 |
+
if [ "$GPU_MEM" -lt 15500 ]; then
|
| 27 |
IS_T4=1
|
| 28 |
echo "[GPU] Detected T4 GPUs (${GPU_MEM}MB VRAM each)"
|
| 29 |
else
|
|
|
|
| 42 |
GPU_IDS="${1:-0}"
|
| 43 |
FP16_FLAG="--gradient_checkpointing"
|
| 44 |
echo "[GPU] Strategy: 1x T4 + fp32 + gradient_checkpointing"
|
| 45 |
+
elif [ "$GPU_MEM" -gt 16000 ]; then
|
| 46 |
+
GPU_MODE="p100"
|
| 47 |
+
GPU_IDS="${1:-0}"
|
| 48 |
+
FP16_FLAG="--gradient_checkpointing"
|
| 49 |
+
echo "[GPU] Strategy: P100 16GB (fp32 + gradient_checkpointing)"
|
| 50 |
else
|
| 51 |
GPU_MODE="a100"
|
| 52 |
GPU_IDS="${1:-0}"
|
|
|
|
| 62 |
BSZ=2; GA=8; EVAL_BSZ=16
|
| 63 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 64 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 65 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 66 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 67 |
else
|
| 68 |
BSZ=8; GA=4; EVAL_BSZ=128
|
| 69 |
fi
|
|
|
|
| 117 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 118 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 119 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 120 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 121 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 122 |
else
|
| 123 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 124 |
fi
|
|
|
|
| 172 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 173 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 174 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 175 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 176 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 177 |
else
|
| 178 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 179 |
fi
|
|
|
|
| 227 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 228 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 229 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 230 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 231 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 232 |
else
|
| 233 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 234 |
fi
|
|
|
|
| 282 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 283 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 284 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 285 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 286 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 287 |
else
|
| 288 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 289 |
fi
|
|
|
|
| 337 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 338 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 339 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 340 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 341 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 342 |
else
|
| 343 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 344 |
fi
|
|
|
|
| 392 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 393 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 394 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 395 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 396 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 397 |
else
|
| 398 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 399 |
fi
|
|
|
|
| 447 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 448 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 449 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 450 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 451 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 452 |
else
|
| 453 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 454 |
fi
|
|
|
|
| 502 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 503 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 504 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 505 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 506 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 507 |
else
|
| 508 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 509 |
fi
|
|
|
|
| 557 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 558 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 559 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 560 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 561 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 562 |
else
|
| 563 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 564 |
fi
|
|
|
|
| 612 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 613 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 614 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 615 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 616 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 617 |
else
|
| 618 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 619 |
fi
|
|
|
|
| 667 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 668 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 669 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 670 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 671 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 672 |
else
|
| 673 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 674 |
fi
|
|
|
|
| 722 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 723 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 724 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 725 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 726 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 727 |
else
|
| 728 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 729 |
fi
|
|
|
|
| 777 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 778 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 779 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 780 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 781 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 782 |
else
|
| 783 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 784 |
fi
|
|
|
|
| 832 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 833 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 834 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 835 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 836 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 837 |
else
|
| 838 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 839 |
fi
|
improve_gainlora/gen_script_long_order4_t5_specroute.sh
CHANGED
|
@@ -23,7 +23,7 @@ if [ -z "$GPU_MEM" ]; then
|
|
| 23 |
fi
|
| 24 |
|
| 25 |
# Determine GPU type
|
| 26 |
-
if [ "$GPU_MEM" -lt
|
| 27 |
IS_T4=1
|
| 28 |
echo "[GPU] Detected T4 GPUs (${GPU_MEM}MB VRAM each)"
|
| 29 |
else
|
|
@@ -42,6 +42,11 @@ elif [ "$IS_T4" -eq 1 ]; then
|
|
| 42 |
GPU_IDS="${1:-0}"
|
| 43 |
FP16_FLAG="--gradient_checkpointing"
|
| 44 |
echo "[GPU] Strategy: 1x T4 + fp32 + gradient_checkpointing"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
else
|
| 46 |
GPU_MODE="a100"
|
| 47 |
GPU_IDS="${1:-0}"
|
|
@@ -57,6 +62,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 57 |
BSZ=2; GA=8; EVAL_BSZ=16
|
| 58 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 59 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 60 |
else
|
| 61 |
BSZ=8; GA=4; EVAL_BSZ=128
|
| 62 |
fi
|
|
@@ -110,6 +117,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 110 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 111 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 112 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 113 |
else
|
| 114 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 115 |
fi
|
|
@@ -163,6 +172,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 163 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 164 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 165 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 166 |
else
|
| 167 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 168 |
fi
|
|
@@ -216,6 +227,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 216 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 217 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 218 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 219 |
else
|
| 220 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 221 |
fi
|
|
@@ -269,6 +282,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 269 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 270 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 271 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 272 |
else
|
| 273 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 274 |
fi
|
|
@@ -322,6 +337,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 322 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 323 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 324 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 325 |
else
|
| 326 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 327 |
fi
|
|
@@ -375,6 +392,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 375 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 376 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 377 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 378 |
else
|
| 379 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 380 |
fi
|
|
@@ -428,6 +447,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 428 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 429 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 430 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 431 |
else
|
| 432 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 433 |
fi
|
|
@@ -481,6 +502,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 481 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 482 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 483 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 484 |
else
|
| 485 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 486 |
fi
|
|
@@ -534,6 +557,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 534 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 535 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 536 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 537 |
else
|
| 538 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 539 |
fi
|
|
@@ -587,6 +612,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 587 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 588 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 589 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 590 |
else
|
| 591 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 592 |
fi
|
|
@@ -640,6 +667,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 640 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 641 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 642 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 643 |
else
|
| 644 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 645 |
fi
|
|
@@ -693,6 +722,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 693 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 694 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 695 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 696 |
else
|
| 697 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 698 |
fi
|
|
@@ -746,6 +777,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 746 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 747 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 748 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 749 |
else
|
| 750 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 751 |
fi
|
|
@@ -799,6 +832,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 799 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 800 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 801 |
BSZ=4; GA=8; EVAL_BSZ=16
|
|
|
|
|
|
|
| 802 |
else
|
| 803 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 804 |
fi
|
|
|
|
| 23 |
fi
|
| 24 |
|
| 25 |
# Determine GPU type
|
| 26 |
+
if [ "$GPU_MEM" -lt 15500 ]; then
|
| 27 |
IS_T4=1
|
| 28 |
echo "[GPU] Detected T4 GPUs (${GPU_MEM}MB VRAM each)"
|
| 29 |
else
|
|
|
|
| 42 |
GPU_IDS="${1:-0}"
|
| 43 |
FP16_FLAG="--gradient_checkpointing"
|
| 44 |
echo "[GPU] Strategy: 1x T4 + fp32 + gradient_checkpointing"
|
| 45 |
+
elif [ "$GPU_MEM" -gt 16000 ]; then
|
| 46 |
+
GPU_MODE="p100"
|
| 47 |
+
GPU_IDS="${1:-0}"
|
| 48 |
+
FP16_FLAG="--gradient_checkpointing"
|
| 49 |
+
echo "[GPU] Strategy: P100 16GB (fp32 + gradient_checkpointing)"
|
| 50 |
else
|
| 51 |
GPU_MODE="a100"
|
| 52 |
GPU_IDS="${1:-0}"
|
|
|
|
| 62 |
BSZ=2; GA=8; EVAL_BSZ=16
|
| 63 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 64 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 65 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 66 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 67 |
else
|
| 68 |
BSZ=8; GA=4; EVAL_BSZ=128
|
| 69 |
fi
|
|
|
|
| 117 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 118 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 119 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 120 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 121 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 122 |
else
|
| 123 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 124 |
fi
|
|
|
|
| 172 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 173 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 174 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 175 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 176 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 177 |
else
|
| 178 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 179 |
fi
|
|
|
|
| 227 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 228 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 229 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 230 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 231 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 232 |
else
|
| 233 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 234 |
fi
|
|
|
|
| 282 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 283 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 284 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 285 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 286 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 287 |
else
|
| 288 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 289 |
fi
|
|
|
|
| 337 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 338 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 339 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 340 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 341 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 342 |
else
|
| 343 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 344 |
fi
|
|
|
|
| 392 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 393 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 394 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 395 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 396 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 397 |
else
|
| 398 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 399 |
fi
|
|
|
|
| 447 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 448 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 449 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 450 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 451 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 452 |
else
|
| 453 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 454 |
fi
|
|
|
|
| 502 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 503 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 504 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 505 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 506 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 507 |
else
|
| 508 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 509 |
fi
|
|
|
|
| 557 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 558 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 559 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 560 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 561 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 562 |
else
|
| 563 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 564 |
fi
|
|
|
|
| 612 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 613 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 614 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 615 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 616 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 617 |
else
|
| 618 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 619 |
fi
|
|
|
|
| 667 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 668 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 669 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 670 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 671 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 672 |
else
|
| 673 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 674 |
fi
|
|
|
|
| 722 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 723 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 724 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 725 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 726 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 727 |
else
|
| 728 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 729 |
fi
|
|
|
|
| 777 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 778 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 779 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 780 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 781 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 782 |
else
|
| 783 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 784 |
fi
|
|
|
|
| 832 |
BSZ=2; GA=4; EVAL_BSZ=16
|
| 833 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 834 |
BSZ=4; GA=8; EVAL_BSZ=16
|
| 835 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 836 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 837 |
else
|
| 838 |
BSZ=16; GA=2; EVAL_BSZ=128
|
| 839 |
fi
|
improve_gainlora/gen_script_superni_order1_t5_specroute.sh
CHANGED
|
@@ -23,7 +23,7 @@ if [ -z "$GPU_MEM" ]; then
|
|
| 23 |
fi
|
| 24 |
|
| 25 |
# Determine GPU type
|
| 26 |
-
if [ "$GPU_MEM" -lt
|
| 27 |
IS_T4=1
|
| 28 |
echo "[GPU] Detected T4 GPUs (${GPU_MEM}MB VRAM each)"
|
| 29 |
else
|
|
@@ -44,6 +44,11 @@ elif [ "$IS_T4" -eq 1 ]; then
|
|
| 44 |
GPU_IDS="${1:-0}"
|
| 45 |
FP16_FLAG="--gradient_checkpointing"
|
| 46 |
echo "[GPU] Strategy: 1x T4 + fp32 + gradient_checkpointing"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
else
|
| 48 |
GPU_MODE="a100"
|
| 49 |
GPU_IDS="${1:-0}"
|
|
@@ -59,6 +64,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 59 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 60 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 61 |
BSZ=2; GA=16; EVAL_BSZ=2
|
|
|
|
|
|
|
| 62 |
else
|
| 63 |
BSZ=16; GA=2; EVAL_BSZ=4
|
| 64 |
fi
|
|
@@ -110,6 +117,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 110 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 111 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 112 |
BSZ=4; GA=8; EVAL_BSZ=2
|
|
|
|
|
|
|
| 113 |
else
|
| 114 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 115 |
fi
|
|
@@ -161,6 +170,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 161 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 162 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 163 |
BSZ=4; GA=8; EVAL_BSZ=2
|
|
|
|
|
|
|
| 164 |
else
|
| 165 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 166 |
fi
|
|
@@ -212,6 +223,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 212 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 213 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 214 |
BSZ=4; GA=8; EVAL_BSZ=2
|
|
|
|
|
|
|
| 215 |
else
|
| 216 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 217 |
fi
|
|
@@ -263,6 +276,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 263 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 264 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 265 |
BSZ=4; GA=8; EVAL_BSZ=2
|
|
|
|
|
|
|
| 266 |
else
|
| 267 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 268 |
fi
|
|
@@ -314,6 +329,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 314 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 315 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 316 |
BSZ=4; GA=8; EVAL_BSZ=2
|
|
|
|
|
|
|
| 317 |
else
|
| 318 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 319 |
fi
|
|
@@ -365,6 +382,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 365 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 366 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 367 |
BSZ=4; GA=8; EVAL_BSZ=2
|
|
|
|
|
|
|
| 368 |
else
|
| 369 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 370 |
fi
|
|
@@ -416,6 +435,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 416 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 417 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 418 |
BSZ=4; GA=8; EVAL_BSZ=2
|
|
|
|
|
|
|
| 419 |
else
|
| 420 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 421 |
fi
|
|
@@ -467,6 +488,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 467 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 468 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 469 |
BSZ=4; GA=8; EVAL_BSZ=2
|
|
|
|
|
|
|
| 470 |
else
|
| 471 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 472 |
fi
|
|
@@ -518,6 +541,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 518 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 519 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 520 |
BSZ=4; GA=8; EVAL_BSZ=2
|
|
|
|
|
|
|
| 521 |
else
|
| 522 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 523 |
fi
|
|
@@ -569,6 +594,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 569 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 570 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 571 |
BSZ=4; GA=8; EVAL_BSZ=2
|
|
|
|
|
|
|
| 572 |
else
|
| 573 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 574 |
fi
|
|
@@ -620,6 +647,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 620 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 621 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 622 |
BSZ=4; GA=8; EVAL_BSZ=2
|
|
|
|
|
|
|
| 623 |
else
|
| 624 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 625 |
fi
|
|
@@ -671,6 +700,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 671 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 672 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 673 |
BSZ=4; GA=8; EVAL_BSZ=2
|
|
|
|
|
|
|
| 674 |
else
|
| 675 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 676 |
fi
|
|
@@ -722,6 +753,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 722 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 723 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 724 |
BSZ=4; GA=8; EVAL_BSZ=2
|
|
|
|
|
|
|
| 725 |
else
|
| 726 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 727 |
fi
|
|
@@ -773,6 +806,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 773 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 774 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 775 |
BSZ=4; GA=8; EVAL_BSZ=2
|
|
|
|
|
|
|
| 776 |
else
|
| 777 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 778 |
fi
|
|
|
|
| 23 |
fi
|
| 24 |
|
| 25 |
# Determine GPU type
|
| 26 |
+
if [ "$GPU_MEM" -lt 15500 ]; then
|
| 27 |
IS_T4=1
|
| 28 |
echo "[GPU] Detected T4 GPUs (${GPU_MEM}MB VRAM each)"
|
| 29 |
else
|
|
|
|
| 44 |
GPU_IDS="${1:-0}"
|
| 45 |
FP16_FLAG="--gradient_checkpointing"
|
| 46 |
echo "[GPU] Strategy: 1x T4 + fp32 + gradient_checkpointing"
|
| 47 |
+
elif [ "$GPU_MEM" -gt 16000 ]; then
|
| 48 |
+
GPU_MODE="p100"
|
| 49 |
+
GPU_IDS="${1:-0}"
|
| 50 |
+
FP16_FLAG="--gradient_checkpointing"
|
| 51 |
+
echo "[GPU] Strategy: P100 16GB (fp32 + gradient_checkpointing)"
|
| 52 |
else
|
| 53 |
GPU_MODE="a100"
|
| 54 |
GPU_IDS="${1:-0}"
|
|
|
|
| 64 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 65 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 66 |
BSZ=2; GA=16; EVAL_BSZ=2
|
| 67 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 68 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 69 |
else
|
| 70 |
BSZ=16; GA=2; EVAL_BSZ=4
|
| 71 |
fi
|
|
|
|
| 117 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 118 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 119 |
BSZ=4; GA=8; EVAL_BSZ=2
|
| 120 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 121 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 122 |
else
|
| 123 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 124 |
fi
|
|
|
|
| 170 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 171 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 172 |
BSZ=4; GA=8; EVAL_BSZ=2
|
| 173 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 174 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 175 |
else
|
| 176 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 177 |
fi
|
|
|
|
| 223 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 224 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 225 |
BSZ=4; GA=8; EVAL_BSZ=2
|
| 226 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 227 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 228 |
else
|
| 229 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 230 |
fi
|
|
|
|
| 276 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 277 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 278 |
BSZ=4; GA=8; EVAL_BSZ=2
|
| 279 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 280 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 281 |
else
|
| 282 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 283 |
fi
|
|
|
|
| 329 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 330 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 331 |
BSZ=4; GA=8; EVAL_BSZ=2
|
| 332 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 333 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 334 |
else
|
| 335 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 336 |
fi
|
|
|
|
| 382 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 383 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 384 |
BSZ=4; GA=8; EVAL_BSZ=2
|
| 385 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 386 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 387 |
else
|
| 388 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 389 |
fi
|
|
|
|
| 435 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 436 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 437 |
BSZ=4; GA=8; EVAL_BSZ=2
|
| 438 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 439 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 440 |
else
|
| 441 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 442 |
fi
|
|
|
|
| 488 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 489 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 490 |
BSZ=4; GA=8; EVAL_BSZ=2
|
| 491 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 492 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 493 |
else
|
| 494 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 495 |
fi
|
|
|
|
| 541 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 542 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 543 |
BSZ=4; GA=8; EVAL_BSZ=2
|
| 544 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 545 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 546 |
else
|
| 547 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 548 |
fi
|
|
|
|
| 594 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 595 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 596 |
BSZ=4; GA=8; EVAL_BSZ=2
|
| 597 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 598 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 599 |
else
|
| 600 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 601 |
fi
|
|
|
|
| 647 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 648 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 649 |
BSZ=4; GA=8; EVAL_BSZ=2
|
| 650 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 651 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 652 |
else
|
| 653 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 654 |
fi
|
|
|
|
| 700 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 701 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 702 |
BSZ=4; GA=8; EVAL_BSZ=2
|
| 703 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 704 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 705 |
else
|
| 706 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 707 |
fi
|
|
|
|
| 753 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 754 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 755 |
BSZ=4; GA=8; EVAL_BSZ=2
|
| 756 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 757 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 758 |
else
|
| 759 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 760 |
fi
|
|
|
|
| 806 |
BSZ=2; GA=8; EVAL_BSZ=2
|
| 807 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 808 |
BSZ=4; GA=8; EVAL_BSZ=2
|
| 809 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 810 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 811 |
else
|
| 812 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 813 |
fi
|
improve_gainlora/gen_script_superni_order2_t5_specroute.sh
CHANGED
|
@@ -23,7 +23,7 @@ if [ -z "$GPU_MEM" ]; then
|
|
| 23 |
fi
|
| 24 |
|
| 25 |
# Determine GPU type
|
| 26 |
-
if [ "$GPU_MEM" -lt
|
| 27 |
IS_T4=1
|
| 28 |
echo "[GPU] Detected T4 GPUs (${GPU_MEM}MB VRAM each)"
|
| 29 |
else
|
|
@@ -42,6 +42,11 @@ elif [ "$IS_T4" -eq 1 ]; then
|
|
| 42 |
GPU_IDS="${1:-0}"
|
| 43 |
FP16_FLAG="--gradient_checkpointing"
|
| 44 |
echo "[GPU] Strategy: 1x T4 + fp32 + gradient_checkpointing"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
else
|
| 46 |
GPU_MODE="a100"
|
| 47 |
GPU_IDS="${1:-0}"
|
|
@@ -57,6 +62,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 57 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 58 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 59 |
BSZ=8; GA=4; EVAL_BSZ=4
|
|
|
|
|
|
|
| 60 |
else
|
| 61 |
BSZ=16; GA=2; EVAL_BSZ=4
|
| 62 |
fi
|
|
@@ -107,6 +114,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 107 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 108 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 109 |
BSZ=8; GA=4; EVAL_BSZ=4
|
|
|
|
|
|
|
| 110 |
else
|
| 111 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 112 |
fi
|
|
@@ -157,6 +166,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 157 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 158 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 159 |
BSZ=8; GA=4; EVAL_BSZ=4
|
|
|
|
|
|
|
| 160 |
else
|
| 161 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 162 |
fi
|
|
@@ -207,6 +218,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 207 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 208 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 209 |
BSZ=8; GA=4; EVAL_BSZ=4
|
|
|
|
|
|
|
| 210 |
else
|
| 211 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 212 |
fi
|
|
@@ -257,6 +270,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 257 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 258 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 259 |
BSZ=8; GA=4; EVAL_BSZ=4
|
|
|
|
|
|
|
| 260 |
else
|
| 261 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 262 |
fi
|
|
@@ -307,6 +322,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 307 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 308 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 309 |
BSZ=8; GA=4; EVAL_BSZ=4
|
|
|
|
|
|
|
| 310 |
else
|
| 311 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 312 |
fi
|
|
@@ -357,6 +374,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 357 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 358 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 359 |
BSZ=8; GA=4; EVAL_BSZ=4
|
|
|
|
|
|
|
| 360 |
else
|
| 361 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 362 |
fi
|
|
@@ -407,6 +426,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 407 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 408 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 409 |
BSZ=8; GA=4; EVAL_BSZ=4
|
|
|
|
|
|
|
| 410 |
else
|
| 411 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 412 |
fi
|
|
@@ -457,6 +478,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 457 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 458 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 459 |
BSZ=8; GA=4; EVAL_BSZ=4
|
|
|
|
|
|
|
| 460 |
else
|
| 461 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 462 |
fi
|
|
@@ -507,6 +530,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 507 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 508 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 509 |
BSZ=8; GA=4; EVAL_BSZ=4
|
|
|
|
|
|
|
| 510 |
else
|
| 511 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 512 |
fi
|
|
@@ -557,6 +582,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 557 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 558 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 559 |
BSZ=8; GA=4; EVAL_BSZ=4
|
|
|
|
|
|
|
| 560 |
else
|
| 561 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 562 |
fi
|
|
@@ -607,6 +634,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 607 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 608 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 609 |
BSZ=8; GA=4; EVAL_BSZ=4
|
|
|
|
|
|
|
| 610 |
else
|
| 611 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 612 |
fi
|
|
@@ -657,6 +686,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 657 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 658 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 659 |
BSZ=8; GA=4; EVAL_BSZ=4
|
|
|
|
|
|
|
| 660 |
else
|
| 661 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 662 |
fi
|
|
@@ -707,6 +738,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 707 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 708 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 709 |
BSZ=8; GA=4; EVAL_BSZ=4
|
|
|
|
|
|
|
| 710 |
else
|
| 711 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 712 |
fi
|
|
@@ -757,6 +790,8 @@ if [ "$GPU_MODE" = "t4_2gpu" ]; then
|
|
| 757 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 758 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 759 |
BSZ=8; GA=4; EVAL_BSZ=4
|
|
|
|
|
|
|
| 760 |
else
|
| 761 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 762 |
fi
|
|
|
|
| 23 |
fi
|
| 24 |
|
| 25 |
# Determine GPU type
|
| 26 |
+
if [ "$GPU_MEM" -lt 15500 ]; then
|
| 27 |
IS_T4=1
|
| 28 |
echo "[GPU] Detected T4 GPUs (${GPU_MEM}MB VRAM each)"
|
| 29 |
else
|
|
|
|
| 42 |
GPU_IDS="${1:-0}"
|
| 43 |
FP16_FLAG="--gradient_checkpointing"
|
| 44 |
echo "[GPU] Strategy: 1x T4 + fp32 + gradient_checkpointing"
|
| 45 |
+
elif [ "$GPU_MEM" -gt 16000 ]; then
|
| 46 |
+
GPU_MODE="p100"
|
| 47 |
+
GPU_IDS="${1:-0}"
|
| 48 |
+
FP16_FLAG="--gradient_checkpointing"
|
| 49 |
+
echo "[GPU] Strategy: P100 16GB (fp32 + gradient_checkpointing)"
|
| 50 |
else
|
| 51 |
GPU_MODE="a100"
|
| 52 |
GPU_IDS="${1:-0}"
|
|
|
|
| 62 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 63 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 64 |
BSZ=8; GA=4; EVAL_BSZ=4
|
| 65 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 66 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 67 |
else
|
| 68 |
BSZ=16; GA=2; EVAL_BSZ=4
|
| 69 |
fi
|
|
|
|
| 114 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 115 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 116 |
BSZ=8; GA=4; EVAL_BSZ=4
|
| 117 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 118 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 119 |
else
|
| 120 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 121 |
fi
|
|
|
|
| 166 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 167 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 168 |
BSZ=8; GA=4; EVAL_BSZ=4
|
| 169 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 170 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 171 |
else
|
| 172 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 173 |
fi
|
|
|
|
| 218 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 219 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 220 |
BSZ=8; GA=4; EVAL_BSZ=4
|
| 221 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 222 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 223 |
else
|
| 224 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 225 |
fi
|
|
|
|
| 270 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 271 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 272 |
BSZ=8; GA=4; EVAL_BSZ=4
|
| 273 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 274 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 275 |
else
|
| 276 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 277 |
fi
|
|
|
|
| 322 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 323 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 324 |
BSZ=8; GA=4; EVAL_BSZ=4
|
| 325 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 326 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 327 |
else
|
| 328 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 329 |
fi
|
|
|
|
| 374 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 375 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 376 |
BSZ=8; GA=4; EVAL_BSZ=4
|
| 377 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 378 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 379 |
else
|
| 380 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 381 |
fi
|
|
|
|
| 426 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 427 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 428 |
BSZ=8; GA=4; EVAL_BSZ=4
|
| 429 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 430 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 431 |
else
|
| 432 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 433 |
fi
|
|
|
|
| 478 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 479 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 480 |
BSZ=8; GA=4; EVAL_BSZ=4
|
| 481 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 482 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 483 |
else
|
| 484 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 485 |
fi
|
|
|
|
| 530 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 531 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 532 |
BSZ=8; GA=4; EVAL_BSZ=4
|
| 533 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 534 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 535 |
else
|
| 536 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 537 |
fi
|
|
|
|
| 582 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 583 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 584 |
BSZ=8; GA=4; EVAL_BSZ=4
|
| 585 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 586 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 587 |
else
|
| 588 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 589 |
fi
|
|
|
|
| 634 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 635 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 636 |
BSZ=8; GA=4; EVAL_BSZ=4
|
| 637 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 638 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 639 |
else
|
| 640 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 641 |
fi
|
|
|
|
| 686 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 687 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 688 |
BSZ=8; GA=4; EVAL_BSZ=4
|
| 689 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 690 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 691 |
else
|
| 692 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 693 |
fi
|
|
|
|
| 738 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 739 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 740 |
BSZ=8; GA=4; EVAL_BSZ=4
|
| 741 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 742 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 743 |
else
|
| 744 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 745 |
fi
|
|
|
|
| 790 |
BSZ=4; GA=4; EVAL_BSZ=4
|
| 791 |
elif [ "$GPU_MODE" = "t4_1gpu" ]; then
|
| 792 |
BSZ=8; GA=4; EVAL_BSZ=4
|
| 793 |
+
elif [ "$GPU_MODE" = "p100" ]; then
|
| 794 |
+
BSZ=8; GA=4; EVAL_BSZ=4
|
| 795 |
else
|
| 796 |
BSZ=32; GA=1; EVAL_BSZ=4
|
| 797 |
fi
|
improve_gainlora/src/cl_trainer_specroute.py
CHANGED
|
@@ -343,6 +343,10 @@ class SpecRoute_Trainer(Seq2SeqTrainer):
|
|
| 343 |
C_tilde = (C_tilde + C_tilde.T) * 0.5
|
| 344 |
# eigh returns ascending eigenvalues; take last r (largest)
|
| 345 |
eigvals, eigvecs = torch.linalg.eigh(C_tilde.float())
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
top_eigvecs = eigvecs[:, -r:].flip(dims=[1]) # [step, r]
|
| 347 |
A_init = top_eigvecs.T # [r, step]
|
| 348 |
dtype = module.lora_q.lora_A.data.dtype
|
|
@@ -408,7 +412,7 @@ class SpecRoute_Trainer(Seq2SeqTrainer):
|
|
| 408 |
else:
|
| 409 |
labels = None
|
| 410 |
outputs = self.model(**inputs)
|
| 411 |
-
if step >
|
| 412 |
break
|
| 413 |
print('end get representation')
|
| 414 |
|
|
|
|
| 343 |
C_tilde = (C_tilde + C_tilde.T) * 0.5
|
| 344 |
# eigh returns ascending eigenvalues; take last r (largest)
|
| 345 |
eigvals, eigvecs = torch.linalg.eigh(C_tilde.float())
|
| 346 |
+
# Fallback: if null-space signal is degenerate, keep Kaiming init
|
| 347 |
+
if eigvals[-1].item() < 1e-6:
|
| 348 |
+
print(f'[C5] Layer {i+1} index {index}: max_eigval={eigvals[-1].item():.2e} < 1e-6, fallback to Kaiming+InfLoRA')
|
| 349 |
+
continue
|
| 350 |
top_eigvecs = eigvecs[:, -r:].flip(dims=[1]) # [step, r]
|
| 351 |
A_init = top_eigvecs.T # [r, step]
|
| 352 |
dtype = module.lora_q.lora_A.data.dtype
|
|
|
|
| 412 |
else:
|
| 413 |
labels = None
|
| 414 |
outputs = self.model(**inputs)
|
| 415 |
+
if step > 200: # 200 batches sufficient for stable SVD (reduced from 1000 for speed)
|
| 416 |
break
|
| 417 |
print('end get representation')
|
| 418 |
|
improve_gainlora/src/run_t5.py
CHANGED
|
@@ -179,7 +179,7 @@ class ModelArguments:
|
|
| 179 |
metadata={"help": "Weight for spectral entropy regularization (C4). 0 = disabled."},
|
| 180 |
)
|
| 181 |
use_preconditioning: Optional[bool] = field(
|
| 182 |
-
default=
|
| 183 |
metadata={"help": "Enable (AA^T+eps*I)^{-1/2} gradient preconditioning on lora_B (C4)."},
|
| 184 |
)
|
| 185 |
precond_eps: Optional[float] = field(
|
|
@@ -955,7 +955,8 @@ def main():
|
|
| 955 |
n_batches_c5=model_args.n_batches_c5,
|
| 956 |
)
|
| 957 |
if training_args.do_train:
|
| 958 |
-
|
|
|
|
| 959 |
trainer.get_reg_matrix()
|
| 960 |
trainer.precompute_preconditioners()
|
| 961 |
else:
|
|
|
|
| 179 |
metadata={"help": "Weight for spectral entropy regularization (C4). 0 = disabled."},
|
| 180 |
)
|
| 181 |
use_preconditioning: Optional[bool] = field(
|
| 182 |
+
default=True,
|
| 183 |
metadata={"help": "Enable (AA^T+eps*I)^{-1/2} gradient preconditioning on lora_B (C4)."},
|
| 184 |
)
|
| 185 |
precond_eps: Optional[float] = field(
|
|
|
|
| 955 |
n_batches_c5=model_args.n_batches_c5,
|
| 956 |
)
|
| 957 |
if training_args.do_train:
|
| 958 |
+
if not model_args.run_single: # C5 is only useful for tasks t>=2
|
| 959 |
+
trainer.pre_task_data_collection()
|
| 960 |
trainer.get_reg_matrix()
|
| 961 |
trainer.precompute_preconditioners()
|
| 962 |
else:
|