MuazTPM commited on
Commit
cacbfce
Β·
verified Β·
1 Parent(s): 21553f2

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +3 -1
  2. TRAINING.md +2 -22
README.md CHANGED
@@ -127,6 +127,8 @@ See [TRAINING.md](TRAINING.md) for copy-paste Colab cells, full CLI reference, a
127
 
128
  ---
129
 
 
 
130
  ```mermaid
131
  graph TB
132
  subgraph Frontend["Frontend β€” Next.js Glass Box Visualizer"]
@@ -190,7 +192,7 @@ graph TB
190
 
191
  ## Tool Roster (18 Tools)
192
 
193
- | Domain Investigation (10) | OS Mechanic (5) | Terminal (2+1) |
194
  |:---|:---|:---|
195
  | `review_alert` | `write_to_case_file` β€” Page to disk | `file_sar` |
196
  | `get_customer_profile` | `request_wire_trace` β€” Async job | `close_alert` |
 
127
 
128
  ---
129
 
130
+ ## Architecture
131
+
132
  ```mermaid
133
  graph TB
134
  subgraph Frontend["Frontend β€” Next.js Glass Box Visualizer"]
 
192
 
193
  ## Tool Roster (18 Tools)
194
 
195
+ | Domain Investigation (11) | OS Mechanic (5) | Terminal (2) |
196
  |:---|:---|:---|
197
  | `review_alert` | `write_to_case_file` β€” Page to disk | `file_sar` |
198
  | `get_customer_profile` | `request_wire_trace` β€” Async job | `close_alert` |
TRAINING.md CHANGED
@@ -89,10 +89,8 @@ Unsloth's 4-bit quantization internally uses float16 as the BNB compute dtype. U
89
 
90
  ```python
91
  %%capture
92
- # ═══════════════════════════════════════════════════════════
93
  # CELL 1: Install Training Stack
94
  # Runtime β†’ GPU β†’ A100 (Colab Pro) or L4
95
- # ═══════════════════════════════════════════════════════════
96
  #
97
  # ⚠️ DO NOT install flash-attn separately!
98
  # Unsloth uses its own custom Triton attention kernels that are
@@ -120,9 +118,7 @@ print(f"βœ“ Unsloth + TRL {trl.__version__} + PEFT {peft.__version__} ready")
120
  ```
121
 
122
  ```python
123
- # ═══════════════════════════════════════════════════════════
124
  # CELL 2: Clone the project
125
- # ═══════════════════════════════════════════════════════════
126
 
127
  !git clone https://github.com/razancodes/Meta-Pytorch-Hackathon.git
128
  %cd Meta-Pytorch-Hackathon
@@ -135,28 +131,22 @@ print(f"βœ“ Unsloth + TRL {trl.__version__} + PEFT {peft.__version__} ready")
135
  ```
136
 
137
  ```python
138
- # ═══════════════════════════════════════════════════════════
139
  # CELL 3: Verify environment (no GPU needed)
140
- # ═══════════════════════════════════════════════════════════
141
 
142
  !python tests/test_smoke.py
143
  # Expected: 8/8 tests passed βœ“
144
  ```
145
 
146
  ```python
147
- # ═══════════════════════════════════════════════════════════
148
  # CELL 4: Dry-run (4 prompts, 1 epoch, no WandB)
149
- # ═══════════════════════════════════════════════════════════
150
 
151
  !python train_grpo.py --dry-run
152
  # Verifies: model loading, prompt generation, reward function, GRPO update
153
  ```
154
 
155
  ```python
156
- # ═══════════════════════════════════════════════════════════
157
  # CELL 5: β˜… GRPO Training (~3-5 hours on A100)
158
- # THIS IS THE PRIMARY TRAINING CELL
159
- # ═══════════════════════════════════════════════════════════
160
 
161
  import wandb
162
  wandb.login()
@@ -176,10 +166,8 @@ wandb.login()
176
  ```
177
 
178
  ```python
179
- # ═══════════════════════════════════════════════════════════
180
  # CELL 5b (ALTERNATIVE): Run via HF Jobs CLI
181
  # Uses pay-as-you-go HF compute ($0.80/hr for L4)
182
- # ═══════════════════════════════════════════════════════════
183
 
184
  # !pip install huggingface_hub[cli]
185
  # !hf jobs uv run --flavor l4x1 python train_grpo.py \
@@ -187,17 +175,13 @@ wandb.login()
187
  ```
188
 
189
  ```python
190
- # ═══════════════════════════════════════════════════════════
191
  # CELL 6: Evaluate best checkpoint (9 combos)
192
- # ═══════════════════════════════════════════════════════════
193
 
194
  !python eval_harness.py --checkpoint checkpoints/defender-grpo
195
  ```
196
 
197
  ```python
198
- # ═══════════════════════════════════════════════════════════
199
  # CELL 7: Run 1MDB demo + download AGUI replay
200
- # ═══════════════════════════════════════════════════════════
201
 
202
  # Scripted (deterministic, no GPU)
203
  !python demo_eval.py --dry-run
@@ -212,9 +196,7 @@ wandb.login()
212
  ```
213
 
214
  ```python
215
- # ═══════════════════════════════════════════════════════════
216
  # CELL 8: Save checkpoints to Google Drive
217
- # ═══════════════════════════════════════════════════════════
218
 
219
  import shutil, os
220
 
@@ -226,9 +208,7 @@ print("βœ… Done! Find it in your Drive β†’ memex_checkpoints/")
226
  ```
227
 
228
  ```python
229
- # ═══════════════════════════════════════════════════════════
230
  # CELL 9: Push trained model to HuggingFace Hub
231
- # ═══════════════════════════════════════════════════════════
232
 
233
  from huggingface_hub import HfApi
234
  api = HfApi()
@@ -236,7 +216,7 @@ api = HfApi()
236
  # Push the LoRA adapter
237
  api.upload_folder(
238
  folder_path="checkpoints/defender-grpo",
239
- repo_id="MuazTPM/memex-defender-grpo",
240
  repo_type="model",
241
  commit_message="Defender GRPO checkpoint (Unsloth + TRL)"
242
  )
 
89
 
90
  ```python
91
  %%capture
 
92
  # CELL 1: Install Training Stack
93
  # Runtime β†’ GPU β†’ A100 (Colab Pro) or L4
 
94
  #
95
  # ⚠️ DO NOT install flash-attn separately!
96
  # Unsloth uses its own custom Triton attention kernels that are
 
118
  ```
119
 
120
  ```python
 
121
  # CELL 2: Clone the project
 
122
 
123
  !git clone https://github.com/razancodes/Meta-Pytorch-Hackathon.git
124
  %cd Meta-Pytorch-Hackathon
 
131
  ```
132
 
133
  ```python
 
134
  # CELL 3: Verify environment (no GPU needed)
 
135
 
136
  !python tests/test_smoke.py
137
  # Expected: 8/8 tests passed βœ“
138
  ```
139
 
140
  ```python
 
141
  # CELL 4: Dry-run (4 prompts, 1 epoch, no WandB)
 
142
 
143
  !python train_grpo.py --dry-run
144
  # Verifies: model loading, prompt generation, reward function, GRPO update
145
  ```
146
 
147
  ```python
 
148
  # CELL 5: β˜… GRPO Training (~3-5 hours on A100)
149
+ # This is the primary training cell.
 
150
 
151
  import wandb
152
  wandb.login()
 
166
  ```
167
 
168
  ```python
 
169
  # CELL 5b (ALTERNATIVE): Run via HF Jobs CLI
170
  # Uses pay-as-you-go HF compute ($0.80/hr for L4)
 
171
 
172
  # !pip install huggingface_hub[cli]
173
  # !hf jobs uv run --flavor l4x1 python train_grpo.py \
 
175
  ```
176
 
177
  ```python
 
178
  # CELL 6: Evaluate best checkpoint (9 combos)
 
179
 
180
  !python eval_harness.py --checkpoint checkpoints/defender-grpo
181
  ```
182
 
183
  ```python
 
184
  # CELL 7: Run 1MDB demo + download AGUI replay
 
185
 
186
  # Scripted (deterministic, no GPU)
187
  !python demo_eval.py --dry-run
 
196
  ```
197
 
198
  ```python
 
199
  # CELL 8: Save checkpoints to Google Drive
 
200
 
201
  import shutil, os
202
 
 
208
  ```
209
 
210
  ```python
 
211
  # CELL 9: Push trained model to HuggingFace Hub
 
212
 
213
  from huggingface_hub import HfApi
214
  api = HfApi()
 
216
  # Push the LoRA adapter
217
  api.upload_folder(
218
  folder_path="checkpoints/defender-grpo",
219
+ repo_id="MuazTPM/defender-model",
220
  repo_type="model",
221
  commit_message="Defender GRPO checkpoint (Unsloth + TRL)"
222
  )