Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- README.md +3 -1
- TRAINING.md +2 -22
README.md
CHANGED
|
@@ -127,6 +127,8 @@ See [TRAINING.md](TRAINING.md) for copy-paste Colab cells, full CLI reference, a
|
|
| 127 |
|
| 128 |
---
|
| 129 |
|
|
|
|
|
|
|
| 130 |
```mermaid
|
| 131 |
graph TB
|
| 132 |
subgraph Frontend["Frontend β Next.js Glass Box Visualizer"]
|
|
@@ -190,7 +192,7 @@ graph TB
|
|
| 190 |
|
| 191 |
## Tool Roster (18 Tools)
|
| 192 |
|
| 193 |
-
| Domain Investigation (
|
| 194 |
|:---|:---|:---|
|
| 195 |
| `review_alert` | `write_to_case_file` β Page to disk | `file_sar` |
|
| 196 |
| `get_customer_profile` | `request_wire_trace` β Async job | `close_alert` |
|
|
|
|
| 127 |
|
| 128 |
---
|
| 129 |
|
| 130 |
+
## Architecture
|
| 131 |
+
|
| 132 |
```mermaid
|
| 133 |
graph TB
|
| 134 |
subgraph Frontend["Frontend β Next.js Glass Box Visualizer"]
|
|
|
|
| 192 |
|
| 193 |
## Tool Roster (18 Tools)
|
| 194 |
|
| 195 |
+
| Domain Investigation (11) | OS Mechanic (5) | Terminal (2) |
|
| 196 |
|:---|:---|:---|
|
| 197 |
| `review_alert` | `write_to_case_file` β Page to disk | `file_sar` |
|
| 198 |
| `get_customer_profile` | `request_wire_trace` β Async job | `close_alert` |
|
TRAINING.md
CHANGED
|
@@ -89,10 +89,8 @@ Unsloth's 4-bit quantization internally uses float16 as the BNB compute dtype. U
|
|
| 89 |
|
| 90 |
```python
|
| 91 |
%%capture
|
| 92 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 93 |
# CELL 1: Install Training Stack
|
| 94 |
# Runtime β GPU β A100 (Colab Pro) or L4
|
| 95 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 96 |
#
|
| 97 |
# β οΈ DO NOT install flash-attn separately!
|
| 98 |
# Unsloth uses its own custom Triton attention kernels that are
|
|
@@ -120,9 +118,7 @@ print(f"β Unsloth + TRL {trl.__version__} + PEFT {peft.__version__} ready")
|
|
| 120 |
```
|
| 121 |
|
| 122 |
```python
|
| 123 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 124 |
# CELL 2: Clone the project
|
| 125 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 126 |
|
| 127 |
!git clone https://github.com/razancodes/Meta-Pytorch-Hackathon.git
|
| 128 |
%cd Meta-Pytorch-Hackathon
|
|
@@ -135,28 +131,22 @@ print(f"β Unsloth + TRL {trl.__version__} + PEFT {peft.__version__} ready")
|
|
| 135 |
```
|
| 136 |
|
| 137 |
```python
|
| 138 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 139 |
# CELL 3: Verify environment (no GPU needed)
|
| 140 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 141 |
|
| 142 |
!python tests/test_smoke.py
|
| 143 |
# Expected: 8/8 tests passed β
|
| 144 |
```
|
| 145 |
|
| 146 |
```python
|
| 147 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 148 |
# CELL 4: Dry-run (4 prompts, 1 epoch, no WandB)
|
| 149 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 150 |
|
| 151 |
!python train_grpo.py --dry-run
|
| 152 |
# Verifies: model loading, prompt generation, reward function, GRPO update
|
| 153 |
```
|
| 154 |
|
| 155 |
```python
|
| 156 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 157 |
# CELL 5: β
GRPO Training (~3-5 hours on A100)
|
| 158 |
-
#
|
| 159 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 160 |
|
| 161 |
import wandb
|
| 162 |
wandb.login()
|
|
@@ -176,10 +166,8 @@ wandb.login()
|
|
| 176 |
```
|
| 177 |
|
| 178 |
```python
|
| 179 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 180 |
# CELL 5b (ALTERNATIVE): Run via HF Jobs CLI
|
| 181 |
# Uses pay-as-you-go HF compute ($0.80/hr for L4)
|
| 182 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 183 |
|
| 184 |
# !pip install huggingface_hub[cli]
|
| 185 |
# !hf jobs uv run --flavor l4x1 python train_grpo.py \
|
|
@@ -187,17 +175,13 @@ wandb.login()
|
|
| 187 |
```
|
| 188 |
|
| 189 |
```python
|
| 190 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 191 |
# CELL 6: Evaluate best checkpoint (9 combos)
|
| 192 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 193 |
|
| 194 |
!python eval_harness.py --checkpoint checkpoints/defender-grpo
|
| 195 |
```
|
| 196 |
|
| 197 |
```python
|
| 198 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 199 |
# CELL 7: Run 1MDB demo + download AGUI replay
|
| 200 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 201 |
|
| 202 |
# Scripted (deterministic, no GPU)
|
| 203 |
!python demo_eval.py --dry-run
|
|
@@ -212,9 +196,7 @@ wandb.login()
|
|
| 212 |
```
|
| 213 |
|
| 214 |
```python
|
| 215 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 216 |
# CELL 8: Save checkpoints to Google Drive
|
| 217 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 218 |
|
| 219 |
import shutil, os
|
| 220 |
|
|
@@ -226,9 +208,7 @@ print("β
Done! Find it in your Drive β memex_checkpoints/")
|
|
| 226 |
```
|
| 227 |
|
| 228 |
```python
|
| 229 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 230 |
# CELL 9: Push trained model to HuggingFace Hub
|
| 231 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 232 |
|
| 233 |
from huggingface_hub import HfApi
|
| 234 |
api = HfApi()
|
|
@@ -236,7 +216,7 @@ api = HfApi()
|
|
| 236 |
# Push the LoRA adapter
|
| 237 |
api.upload_folder(
|
| 238 |
folder_path="checkpoints/defender-grpo",
|
| 239 |
-
repo_id="MuazTPM/
|
| 240 |
repo_type="model",
|
| 241 |
commit_message="Defender GRPO checkpoint (Unsloth + TRL)"
|
| 242 |
)
|
|
|
|
| 89 |
|
| 90 |
```python
|
| 91 |
%%capture
|
|
|
|
| 92 |
# CELL 1: Install Training Stack
|
| 93 |
# Runtime β GPU β A100 (Colab Pro) or L4
|
|
|
|
| 94 |
#
|
| 95 |
# β οΈ DO NOT install flash-attn separately!
|
| 96 |
# Unsloth uses its own custom Triton attention kernels that are
|
|
|
|
| 118 |
```
|
| 119 |
|
| 120 |
```python
|
|
|
|
| 121 |
# CELL 2: Clone the project
|
|
|
|
| 122 |
|
| 123 |
!git clone https://github.com/razancodes/Meta-Pytorch-Hackathon.git
|
| 124 |
%cd Meta-Pytorch-Hackathon
|
|
|
|
| 131 |
```
|
| 132 |
|
| 133 |
```python
|
|
|
|
| 134 |
# CELL 3: Verify environment (no GPU needed)
|
|
|
|
| 135 |
|
| 136 |
!python tests/test_smoke.py
|
| 137 |
# Expected: 8/8 tests passed β
|
| 138 |
```
|
| 139 |
|
| 140 |
```python
|
|
|
|
| 141 |
# CELL 4: Dry-run (4 prompts, 1 epoch, no WandB)
|
|
|
|
| 142 |
|
| 143 |
!python train_grpo.py --dry-run
|
| 144 |
# Verifies: model loading, prompt generation, reward function, GRPO update
|
| 145 |
```
|
| 146 |
|
| 147 |
```python
|
|
|
|
| 148 |
# CELL 5: β
GRPO Training (~3-5 hours on A100)
|
| 149 |
+
# This is the primary training cell.
|
|
|
|
| 150 |
|
| 151 |
import wandb
|
| 152 |
wandb.login()
|
|
|
|
| 166 |
```
|
| 167 |
|
| 168 |
```python
|
|
|
|
| 169 |
# CELL 5b (ALTERNATIVE): Run via HF Jobs CLI
|
| 170 |
# Uses pay-as-you-go HF compute ($0.80/hr for L4)
|
|
|
|
| 171 |
|
| 172 |
# !pip install huggingface_hub[cli]
|
| 173 |
# !hf jobs uv run --flavor l4x1 python train_grpo.py \
|
|
|
|
| 175 |
```
|
| 176 |
|
| 177 |
```python
|
|
|
|
| 178 |
# CELL 6: Evaluate best checkpoint (9 combos)
|
|
|
|
| 179 |
|
| 180 |
!python eval_harness.py --checkpoint checkpoints/defender-grpo
|
| 181 |
```
|
| 182 |
|
| 183 |
```python
|
|
|
|
| 184 |
# CELL 7: Run 1MDB demo + download AGUI replay
|
|
|
|
| 185 |
|
| 186 |
# Scripted (deterministic, no GPU)
|
| 187 |
!python demo_eval.py --dry-run
|
|
|
|
| 196 |
```
|
| 197 |
|
| 198 |
```python
|
|
|
|
| 199 |
# CELL 8: Save checkpoints to Google Drive
|
|
|
|
| 200 |
|
| 201 |
import shutil, os
|
| 202 |
|
|
|
|
| 208 |
```
|
| 209 |
|
| 210 |
```python
|
|
|
|
| 211 |
# CELL 9: Push trained model to HuggingFace Hub
|
|
|
|
| 212 |
|
| 213 |
from huggingface_hub import HfApi
|
| 214 |
api = HfApi()
|
|
|
|
| 216 |
# Push the LoRA adapter
|
| 217 |
api.upload_folder(
|
| 218 |
folder_path="checkpoints/defender-grpo",
|
| 219 |
+
repo_id="MuazTPM/defender-model",
|
| 220 |
repo_type="model",
|
| 221 |
commit_message="Defender GRPO checkpoint (Unsloth + TRL)"
|
| 222 |
)
|