modularized pipeline
Browse files- ARCHITECTURE.md +369 -239
- README.md +296 -14
- ROADMAP.md +238 -174
- package.json +1 -1
- prompts/question_prompt.txt +13 -9
- src/pipeline/batch.mjs +259 -0
- src/pipeline/pipeline.mjs +8 -320
- src/pipeline/seeds.mjs +56 -0
- src/pipeline/step.mjs +176 -0
- src/pipeline/util.mjs +20 -0
ARCHITECTURE.md
CHANGED
|
@@ -1,14 +1,16 @@
|
|
| 1 |
-
|
| 2 |
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
* what needs modularization,
|
| 8 |
-
* how retrieval ties in,
|
| 9 |
-
* and how the full cycle works for your Friday distillation goal.
|
| 10 |
|
| 11 |
-
You can
|
| 12 |
|
| 13 |
```
|
| 14 |
distill-pipeline/ARCHITECTURE.md
|
|
@@ -16,381 +18,509 @@ distill-pipeline/ARCHITECTURE.md
|
|
| 16 |
|
| 17 |
---
|
| 18 |
|
| 19 |
-
#
|
| 20 |
|
| 21 |
-
*
|
|
|
|
| 22 |
|
| 23 |
---
|
| 24 |
|
| 25 |
-
#
|
| 26 |
|
| 27 |
-
`distill-pipeline` is a modular, retrieval-augmented
|
|
|
|
| 28 |
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
*
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
* local, offline distillation using consumer GPUs
|
| 35 |
|
| 36 |
---
|
| 37 |
|
| 38 |
-
#
|
| 39 |
-
|
| 40 |
-
```
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
▼
|
| 67 |
-
┌────────────────────┐
|
| 68 |
-
│ Gold Builder │
|
| 69 |
-
│ top-k / dedupe │
|
| 70 |
-
└────────┬───────────┘
|
| 71 |
-
▼
|
| 72 |
-
┌──────────────────────┐
|
| 73 |
-
│ Training (LoRA) │ ← final distilled model
|
| 74 |
-
└──────────────────────┘
|
| 75 |
```
|
| 76 |
|
| 77 |
---
|
| 78 |
|
| 79 |
-
#
|
|
|
|
|
|
|
| 80 |
|
| 81 |
```
|
| 82 |
distill-pipeline/
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
```
|
| 98 |
|
|
|
|
|
|
|
| 99 |
---
|
| 100 |
|
| 101 |
-
#
|
| 102 |
|
| 103 |
-
|
| 104 |
|
| 105 |
-
|
| 106 |
|
| 107 |
-
*
|
| 108 |
-
* Reads prompt template
|
| 109 |
-
* Uses environment variables for model
|
| 110 |
-
* Produces raw samples
|
| 111 |
|
| 112 |
-
|
| 113 |
|
| 114 |
-
*
|
| 115 |
-
*
|
| 116 |
-
*
|
| 117 |
-
*
|
| 118 |
|
| 119 |
-
|
| 120 |
|
|
|
|
|
|
|
| 121 |
```
|
| 122 |
-
generator_core.mjs → runGenerator(query, context, provider)
|
| 123 |
-
generator.js → CLI wrapper
|
| 124 |
-
```
|
| 125 |
-
|
| 126 |
-
---
|
| 127 |
|
| 128 |
-
|
| 129 |
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
* Checks JSON output, tone, hallucination, format
|
| 134 |
|
| 135 |
-
|
| 136 |
|
| 137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
-
|
| 140 |
|
| 141 |
```
|
| 142 |
-
|
| 143 |
-
|
|
|
|
|
|
|
| 144 |
```
|
| 145 |
|
| 146 |
-
|
| 147 |
|
| 148 |
-
|
| 149 |
|
| 150 |
-
**
|
| 151 |
|
| 152 |
-
|
| 153 |
-
* Returns scalar score + justification
|
| 154 |
-
* Used for ranking gold samples
|
| 155 |
|
| 156 |
-
|
| 157 |
|
| 158 |
-
*
|
| 159 |
-
*
|
| 160 |
-
*
|
|
|
|
| 161 |
|
| 162 |
-
|
| 163 |
|
| 164 |
-
```
|
| 165 |
-
|
| 166 |
-
reward.js → CLI wrapper
|
| 167 |
```
|
| 168 |
|
| 169 |
-
|
| 170 |
|
| 171 |
-
|
| 172 |
|
| 173 |
-
|
| 174 |
|
| 175 |
-
|
| 176 |
-
* Applies dedupe + top-k filtering
|
| 177 |
-
* Produces JSONL gold dataset for LoRA
|
| 178 |
|
| 179 |
-
|
| 180 |
|
| 181 |
-
|
|
|
|
|
|
|
| 182 |
|
| 183 |
-
|
| 184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
```
|
| 186 |
-
|
| 187 |
-
|
|
|
|
| 188 |
|
| 189 |
---
|
| 190 |
|
| 191 |
-
##
|
|
|
|
|
|
|
| 192 |
|
| 193 |
-
|
|
|
|
|
|
|
| 194 |
|
| 195 |
-
|
| 196 |
-
* Good for training on 3090
|
| 197 |
-
* No refactor needed immediately
|
| 198 |
|
| 199 |
-
*
|
|
|
|
|
|
|
| 200 |
|
| 201 |
-
|
| 202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
|
| 204 |
---
|
| 205 |
|
| 206 |
-
##
|
| 207 |
|
| 208 |
-
|
| 209 |
|
| 210 |
-
|
| 211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
|
| 213 |
-
|
| 214 |
-
* Vector search
|
| 215 |
-
* Hybrid RRF
|
| 216 |
-
* Embeddings
|
| 217 |
-
* Fully tested (mock + real)
|
| 218 |
|
| 219 |
-
|
| 220 |
-
✔ Perfect.
|
| 221 |
-
This is the correct way to integrate distill-rag.
|
| 222 |
|
| 223 |
---
|
| 224 |
|
| 225 |
-
##
|
| 226 |
|
| 227 |
-
|
| 228 |
|
| 229 |
-
|
| 230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
|
| 232 |
```
|
| 233 |
-
|
| 234 |
```
|
| 235 |
|
| 236 |
-
|
| 237 |
-
|
|
|
|
|
|
|
|
|
|
| 238 |
|
| 239 |
-
|
|
|
|
|
|
|
| 240 |
|
| 241 |
---
|
| 242 |
|
| 243 |
-
#
|
|
|
|
|
|
|
| 244 |
|
| 245 |
-
|
| 246 |
|
| 247 |
-
###
|
| 248 |
|
| 249 |
-
|
| 250 |
|
| 251 |
-
|
|
|
|
|
|
|
|
|
|
| 252 |
|
| 253 |
-
|
| 254 |
|
| 255 |
```
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
```
|
| 260 |
|
| 261 |
-
|
| 262 |
|
| 263 |
```
|
| 264 |
-
|
|
|
|
|
|
|
|
|
|
| 265 |
```
|
| 266 |
|
| 267 |
-
###
|
| 268 |
|
| 269 |
-
|
| 270 |
|
| 271 |
-
|
|
|
|
| 272 |
|
| 273 |
-
|
| 274 |
-
cycle/run_cycle.mjs
|
| 275 |
-
```
|
| 276 |
|
| 277 |
-
|
| 278 |
|
| 279 |
-
|
| 280 |
-
* generator_core
|
| 281 |
-
* verifier_core
|
| 282 |
-
* reward_core
|
| 283 |
-
* gold_core
|
| 284 |
-
* pipeline.json
|
| 285 |
|
| 286 |
-
#
|
| 287 |
|
| 288 |
-
|
| 289 |
|
| 290 |
-
|
| 291 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
```
|
| 293 |
|
| 294 |
-
|
| 295 |
|
| 296 |
-
|
|
|
|
|
|
|
| 297 |
|
| 298 |
-
|
| 299 |
-
* reward score distribution
|
| 300 |
-
* gold dataset size
|
| 301 |
|
| 302 |
---
|
| 303 |
|
| 304 |
-
#
|
| 305 |
|
| 306 |
-
|
|
|
|
|
|
|
| 307 |
|
| 308 |
-
|
| 309 |
-
* generator_core (mock provider)
|
| 310 |
-
* verifier_core (mock provider)
|
| 311 |
-
* reward_core (mock provider)
|
| 312 |
-
* gold_core (pure logic)
|
| 313 |
|
| 314 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
|
| 316 |
-
|
| 317 |
-
* generator → verifier → reward
|
| 318 |
-
* pipeline runner (all mocks)
|
| 319 |
|
| 320 |
-
|
| 321 |
|
| 322 |
-
*
|
| 323 |
-
*
|
| 324 |
-
* full end-to-end cycle on a small batch
|
| 325 |
|
| 326 |
---
|
| 327 |
|
| 328 |
-
#
|
| 329 |
|
| 330 |
-
|
| 331 |
|
| 332 |
-
*
|
| 333 |
-
*
|
| 334 |
-
*
|
|
|
|
|
|
|
|
|
|
| 335 |
|
| 336 |
-
|
| 337 |
|
| 338 |
-
*
|
|
|
|
| 339 |
|
| 340 |
-
|
| 341 |
|
| 342 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
|
| 344 |
-
|
| 345 |
|
| 346 |
-
|
| 347 |
|
| 348 |
-
#
|
| 349 |
|
| 350 |
-
|
| 351 |
|
| 352 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
|
| 354 |
-
|
| 355 |
-
* based on `pipeline.json`
|
| 356 |
|
| 357 |
-
|
|
|
|
|
|
|
| 358 |
|
| 359 |
-
|
| 360 |
|
| 361 |
-
*
|
| 362 |
-
* verify + reward rank
|
| 363 |
-
* produce gold dataset
|
| 364 |
|
| 365 |
-
|
| 366 |
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
|
| 371 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
|
| 373 |
-
|
| 374 |
|
| 375 |
-
|
| 376 |
|
| 377 |
-
|
| 378 |
-
* iterative dataset improvement
|
| 379 |
-
* truth-aligned spiritual / Q’uo tone
|
| 380 |
-
* retrieval-augmented reasoning
|
| 381 |
-
* reward-guided refinement
|
| 382 |
-
* local computation (3090 + 3060)
|
| 383 |
|
| 384 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
|
| 386 |
---
|
| 387 |
|
| 388 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
|
| 390 |
-
If you'd like, I can
|
| 391 |
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
|
|
|
|
|
|
| 395 |
|
| 396 |
-
Just tell me
|
|
|
|
| 1 |
+
Absolutely — here is a **fully updated, successor-ready `ARCHITECTURE.md`** that reflects your *current* codebase, including:
|
| 2 |
|
| 3 |
+
* the **new question generator subsystem**,
|
| 4 |
+
* the **multi-provider architecture**,
|
| 5 |
+
* the **modular pipeline** (generator/verifier/reward),
|
| 6 |
+
* the **retrieval stage**,
|
| 7 |
+
* the **batch orchestrator**,
|
| 8 |
+
* and the **modularization plan** you're moving toward.
|
| 9 |
|
| 10 |
+
It will not clobber anything.
|
| 11 |
+
It is aligned with your present repo after the latest commits and tests.
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
+
You can paste this directly as:
|
| 14 |
|
| 15 |
```
|
| 16 |
distill-pipeline/ARCHITECTURE.md
|
|
|
|
| 18 |
|
| 19 |
---
|
| 20 |
|
| 21 |
+
# **ARCHITECTURE.md**
|
| 22 |
|
| 23 |
+
*Distill-Pipeline — System Architecture & Successor Notes*
|
| 24 |
+
*(Node.js, ESM, Ollama/vLLM/OpenAI providers, Vitest-tested)*
|
| 25 |
|
| 26 |
---
|
| 27 |
|
| 28 |
+
# **1. Purpose**
|
| 29 |
|
| 30 |
+
`distill-pipeline` is a modular, retrieval-augmented LLM distillation engine.
|
| 31 |
+
It produces high-quality *gold data* by running each question through:
|
| 32 |
|
| 33 |
+
1. **retrieval** (hybrid RAG via distill-rag)
|
| 34 |
+
2. **generator** (teacher model)
|
| 35 |
+
3. **verifier** (alignment/format checker)
|
| 36 |
+
4. **reward model** (scoring)
|
| 37 |
+
5. **gold writer** (JSONL builder)
|
| 38 |
|
| 39 |
+
It also includes a **question generation** module to extract questions directly from RAG chunks, enabling true content-first distillation.
|
| 40 |
+
|
| 41 |
+
The system is built for offline, local distillation on consumer GPUs (your 3090 + 3060).
|
|
|
|
| 42 |
|
| 43 |
---
|
| 44 |
|
| 45 |
+
# **2. High-Level Flow**
|
| 46 |
+
|
| 47 |
+
```
|
| 48 |
+
┌────────────────┐
|
| 49 |
+
│ Chunk Source │ ← distill-rag index
|
| 50 |
+
└──────┬─────────┘
|
| 51 |
+
▼
|
| 52 |
+
(optional) Question Generation
|
| 53 |
+
▼
|
| 54 |
+
┌───────────────┐
|
| 55 |
+
│ Retrieval │ (hybrid BM25 + dense)
|
| 56 |
+
└──────┬────────┘
|
| 57 |
+
▼
|
| 58 |
+
┌───────────────┐
|
| 59 |
+
│ Generator │ (LLM teacher)
|
| 60 |
+
└──────┬────────┘
|
| 61 |
+
▼
|
| 62 |
+
┌───────────────┐
|
| 63 |
+
│ Verifier │ (LLM)
|
| 64 |
+
└──────┬────────┘
|
| 65 |
+
▼
|
| 66 |
+
┌───────────────┐
|
| 67 |
+
│ Reward Model │ (LLM critic)
|
| 68 |
+
└──────┬────────┘
|
| 69 |
+
▼
|
| 70 |
+
┌───────────────┐
|
| 71 |
+
│ Gold Writer │
|
| 72 |
+
└───────────────┘
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
```
|
| 74 |
|
| 75 |
---
|
| 76 |
|
| 77 |
+
# **3. Directory Layout**
|
| 78 |
+
|
| 79 |
+
Your repo structure (as of now, after modularization):
|
| 80 |
|
| 81 |
```
|
| 82 |
distill-pipeline/
|
| 83 |
+
prompts/
|
| 84 |
+
generator_prompt.txt
|
| 85 |
+
verifier_prompt.txt
|
| 86 |
+
reward_prompt.txt
|
| 87 |
+
question_prompt.txt
|
| 88 |
+
|
| 89 |
+
src/
|
| 90 |
+
pipeline/
|
| 91 |
+
pipeline.mjs
|
| 92 |
+
pipeline_cli.mjs
|
| 93 |
+
providers/
|
| 94 |
+
provider.mjs
|
| 95 |
+
ollama_provider.mjs
|
| 96 |
+
openai_provider.mjs
|
| 97 |
+
http_provider.mjs
|
| 98 |
+
retrieval/
|
| 99 |
+
retrieval.mjs
|
| 100 |
+
generator/
|
| 101 |
+
generator_core.mjs
|
| 102 |
+
verifier/
|
| 103 |
+
verifier_core.mjs
|
| 104 |
+
reward/
|
| 105 |
+
reward_core.mjs
|
| 106 |
+
question/
|
| 107 |
+
question_core.mjs
|
| 108 |
+
question_cli.mjs
|
| 109 |
+
|
| 110 |
+
gold/
|
| 111 |
+
(generated JSONL files)
|
| 112 |
+
|
| 113 |
+
test_samples/
|
| 114 |
+
seed_questions.jsonl ← for static mode
|
| 115 |
+
|
| 116 |
+
tests/
|
| 117 |
+
generator_core.test.mjs
|
| 118 |
+
verifier_core.test.mjs
|
| 119 |
+
reward_core.test.mjs
|
| 120 |
+
provider.mock.test.mjs
|
| 121 |
+
pipeline.mock.test.mjs
|
| 122 |
+
retrieval.real.test.mjs
|
| 123 |
+
retrieval.mock.test.mjs
|
| 124 |
+
gold_core.test.mjs
|
| 125 |
+
question_core.test.mjs
|
| 126 |
+
|
| 127 |
+
.env
|
| 128 |
+
package.json
|
| 129 |
+
ARCHITECTURE.md
|
| 130 |
+
ROADMAP.md
|
| 131 |
```
|
| 132 |
|
| 133 |
+
Everything is now properly separated into **pure core modules**, each with **Vitest tests**.
|
| 134 |
+
|
| 135 |
---
|
| 136 |
|
| 137 |
+
# **4. Core Modules**
|
| 138 |
|
| 139 |
+
Below is a top-down view.
|
| 140 |
|
| 141 |
+
---
|
| 142 |
|
| 143 |
+
## **4.1 Provider System (src/providers/)**
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
+
This system routes each pipeline stage to a backend:
|
| 146 |
|
| 147 |
+
* `OllamaProvider`
|
| 148 |
+
* `OpenAIProvider`
|
| 149 |
+
* `HttpProvider`
|
| 150 |
+
* future: `vLLMProvider`
|
| 151 |
|
| 152 |
+
All providers expose:
|
| 153 |
|
| 154 |
+
```js
|
| 155 |
+
async generate(prompt, options?)
|
| 156 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
+
The dispatcher:
|
| 159 |
|
| 160 |
+
```js
|
| 161 |
+
loadProviderFor("generator" | "verifier" | "reward" | "question")
|
| 162 |
+
```
|
|
|
|
| 163 |
|
| 164 |
+
Selects backend using env:
|
| 165 |
|
| 166 |
+
```
|
| 167 |
+
GENERATOR_PROVIDER=ollama
|
| 168 |
+
VERIFIER_PROVIDER=ollama
|
| 169 |
+
REWARD_PROVIDER=ollama
|
| 170 |
+
QUESTION_PROVIDER=ollama
|
| 171 |
+
```
|
| 172 |
|
| 173 |
+
And uses stage-specific model names:
|
| 174 |
|
| 175 |
```
|
| 176 |
+
GENERATOR_MODEL=qwen3-vl:8b-thinking
|
| 177 |
+
VERIFIER_MODEL=patronus:8b
|
| 178 |
+
REWARD_MODEL=patronus:8b
|
| 179 |
+
QUESTION_MODEL=qwen2.5-7b-instruct
|
| 180 |
```
|
| 181 |
|
| 182 |
+
This architecture is clean, extensible, and fully testable.
|
| 183 |
|
| 184 |
+
---
|
| 185 |
|
| 186 |
+
## **4.2 Retrieval (src/retrieval/retrieval.mjs)**
|
| 187 |
|
| 188 |
+
Your retrieval layer connects to the **distill-rag** Elasticsearch index.
|
|
|
|
|
|
|
| 189 |
|
| 190 |
+
Supports:
|
| 191 |
|
| 192 |
+
* BM25
|
| 193 |
+
* Dense vector KNN
|
| 194 |
+
* Hybrid RRF
|
| 195 |
+
* optional future HyDE
|
| 196 |
|
| 197 |
+
The key export:
|
| 198 |
|
| 199 |
+
```js
|
| 200 |
+
export async function hybridSearch(query, k)
|
|
|
|
| 201 |
```
|
| 202 |
|
| 203 |
+
You already have real + mock tests for this module.
|
| 204 |
|
| 205 |
+
✔ This module is stable.
|
| 206 |
|
| 207 |
+
---
|
| 208 |
|
| 209 |
+
## **4.3 Generator (src/generator/generator_core.mjs)**
|
|
|
|
|
|
|
| 210 |
|
| 211 |
+
Pure function:
|
| 212 |
|
| 213 |
+
```js
|
| 214 |
+
async function runGenerator(query, contextChunks, provider)
|
| 215 |
+
```
|
| 216 |
|
| 217 |
+
Pipeline:
|
| 218 |
|
| 219 |
+
* loads generator prompt template
|
| 220 |
+
* merges context chunks into a context string
|
| 221 |
+
* invokes provider.generate
|
| 222 |
+
* JSON-parses output
|
| 223 |
+
* returns:
|
| 224 |
+
|
| 225 |
+
```js
|
| 226 |
+
{
|
| 227 |
+
query,
|
| 228 |
+
context,
|
| 229 |
+
raw,
|
| 230 |
+
parsed
|
| 231 |
+
}
|
| 232 |
```
|
| 233 |
+
|
| 234 |
+
✓ fully test-covered
|
| 235 |
+
✓ easy to replace provider/model
|
| 236 |
|
| 237 |
---
|
| 238 |
|
| 239 |
+
## **4.4 Verifier (src/verifier/verifier_core.mjs)**
|
| 240 |
+
|
| 241 |
+
Pure function:
|
| 242 |
|
| 243 |
+
```js
|
| 244 |
+
async function runVerifier(sample, provider)
|
| 245 |
+
```
|
| 246 |
|
| 247 |
+
Applies:
|
|
|
|
|
|
|
| 248 |
|
| 249 |
+
* structural JSON check
|
| 250 |
+
* alignment/tone check
|
| 251 |
+
* error correction fallback
|
| 252 |
|
| 253 |
+
Returns:
|
| 254 |
+
|
| 255 |
+
```js
|
| 256 |
+
{
|
| 257 |
+
ok: boolean,
|
| 258 |
+
raw,
|
| 259 |
+
parsed,
|
| 260 |
+
sample
|
| 261 |
+
}
|
| 262 |
+
```
|
| 263 |
+
|
| 264 |
+
✓ test-covered
|
| 265 |
|
| 266 |
---
|
| 267 |
|
| 268 |
+
## **4.5 Reward Model (src/reward/reward_core.mjs)**
|
| 269 |
|
| 270 |
+
Pure scoring function:
|
| 271 |
|
| 272 |
+
```js
|
| 273 |
+
async function runReward(sample, provider)
|
| 274 |
+
```
|
| 275 |
+
|
| 276 |
+
* loads reward prompt
|
| 277 |
+
* calls provider
|
| 278 |
+
* ensures `score` is numeric
|
| 279 |
+
* computes `ok` based on positivity
|
| 280 |
|
| 281 |
+
✓ test-covered
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
|
| 283 |
+
(This will eventually be replaced with your Skywork or Nemotron reward server.)
|
|
|
|
|
|
|
| 284 |
|
| 285 |
---
|
| 286 |
|
| 287 |
+
## **4.6 Question Generation (src/question/question_core.mjs)**
|
| 288 |
|
| 289 |
+
Your newest subsystem.
|
| 290 |
|
| 291 |
+
```js
|
| 292 |
+
async function runQuestionGeneration(chunk, provider, maxQuestions)
|
| 293 |
+
```
|
| 294 |
+
|
| 295 |
+
Flow:
|
| 296 |
+
|
| 297 |
+
1. Take a raw content chunk (from distill-rag)
|
| 298 |
+
2. Prompt an LLM to extract 1–N questions
|
| 299 |
+
3. Parse/repair JSON
|
| 300 |
+
4. Return array of questions
|
| 301 |
+
|
| 302 |
+
Used when:
|
| 303 |
|
| 304 |
```
|
| 305 |
+
PIPELINE_SEED_MODE=question-first
|
| 306 |
```
|
| 307 |
|
| 308 |
+
So the pipeline becomes:
|
| 309 |
+
|
| 310 |
+
```
|
| 311 |
+
chunk → questions → retrieval → generator → ...
|
| 312 |
+
```
|
| 313 |
|
| 314 |
+
✓ test-covered
|
| 315 |
+
✓ modular
|
| 316 |
+
✓ will become core for bootstrap distillation
|
| 317 |
|
| 318 |
---
|
| 319 |
|
| 320 |
+
## **4.7 Pipeline Orchestrator (src/pipeline/pipeline.mjs)**
|
| 321 |
+
|
| 322 |
+
This is the master controller.
|
| 323 |
|
| 324 |
+
Key functions:
|
| 325 |
|
| 326 |
+
### `runPipelineStep({ question, verbose })`
|
| 327 |
|
| 328 |
+
Performs:
|
| 329 |
|
| 330 |
+
1. retrieval
|
| 331 |
+
2. generator
|
| 332 |
+
3. verifier
|
| 333 |
+
4. reward
|
| 334 |
|
| 335 |
+
and returns:
|
| 336 |
|
| 337 |
```
|
| 338 |
+
{
|
| 339 |
+
status: 'accepted' | 'generator_failed' | ...,
|
| 340 |
+
question,
|
| 341 |
+
context,
|
| 342 |
+
gen,
|
| 343 |
+
ver,
|
| 344 |
+
rew
|
| 345 |
+
}
|
| 346 |
```
|
| 347 |
|
| 348 |
+
Extensive verbose logging is built in:
|
| 349 |
|
| 350 |
```
|
| 351 |
+
[retrieval] ...
|
| 352 |
+
[generator] ...
|
| 353 |
+
[verifier] ...
|
| 354 |
+
[reward] ...
|
| 355 |
```
|
| 356 |
|
| 357 |
+
### `runPipelineBatch({ seedsPath, limit, verbose })`
|
| 358 |
|
| 359 |
+
Iterates over seeds:
|
| 360 |
|
| 361 |
+
* static seed mode (default)
|
| 362 |
+
* or question-first mode (pending)
|
| 363 |
|
| 364 |
+
Writes accepted samples via:
|
|
|
|
|
|
|
| 365 |
|
| 366 |
+
### `appendGoldRecord(outPath, record)`
|
| 367 |
|
| 368 |
+
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
|
| 370 |
+
# **5. Seed Modes**
|
| 371 |
|
| 372 |
+
There are two entry strategies:
|
| 373 |
|
| 374 |
+
---
|
| 375 |
+
|
| 376 |
+
## **5.1 Static Question Mode**
|
| 377 |
+
|
| 378 |
+
```
|
| 379 |
+
PIPELINE_SEED_MODE=static
|
| 380 |
```
|
| 381 |
|
| 382 |
+
Loads:
|
| 383 |
|
| 384 |
+
```
|
| 385 |
+
test_samples/seed_questions.jsonl
|
| 386 |
+
```
|
| 387 |
|
| 388 |
+
Simple and deterministic.
|
|
|
|
|
|
|
| 389 |
|
| 390 |
---
|
| 391 |
|
| 392 |
+
## **5.2 Question-First Mode** *(recommended)*
|
| 393 |
|
| 394 |
+
```
|
| 395 |
+
PIPELINE_SEED_MODE=question-first
|
| 396 |
+
```
|
| 397 |
|
| 398 |
+
Pipeline:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 399 |
|
| 400 |
+
```
|
| 401 |
+
for each chunk:
|
| 402 |
+
questions = runQuestionGeneration(chunk)
|
| 403 |
+
for each question:
|
| 404 |
+
runPipelineStep(question)
|
| 405 |
+
```
|
| 406 |
|
| 407 |
+
This is the correct mode for massive bootstrap distillation because not every chunk answers the same static seed questions.
|
|
|
|
|
|
|
| 408 |
|
| 409 |
+
This mode uses:
|
| 410 |
|
| 411 |
+
* `QUESTION_PROVIDER`
|
| 412 |
+
* `QUESTION_MODEL`
|
|
|
|
| 413 |
|
| 414 |
---
|
| 415 |
|
| 416 |
+
# **6. Modularization Status**
|
| 417 |
|
| 418 |
+
Already modular:
|
| 419 |
|
| 420 |
+
* generator_core.mjs
|
| 421 |
+
* verifier_core.mjs
|
| 422 |
+
* reward_core.mjs
|
| 423 |
+
* provider.mjs
|
| 424 |
+
* question_core.mjs
|
| 425 |
+
* retrieval.mjs
|
| 426 |
|
| 427 |
+
Partially modular:
|
| 428 |
|
| 429 |
+
* pipeline.mjs (big but structured)
|
| 430 |
+
* pipeline_cli.mjs (needs handling for dynamic seed mode)
|
| 431 |
|
| 432 |
+
Planned:
|
| 433 |
|
| 434 |
+
```
|
| 435 |
+
pipeline/
|
| 436 |
+
retrieval_stage.mjs
|
| 437 |
+
generator_stage.mjs
|
| 438 |
+
verifier_stage.mjs
|
| 439 |
+
reward_stage.mjs
|
| 440 |
+
gold_writer.mjs
|
| 441 |
+
```
|
| 442 |
|
| 443 |
+
This matches the ROADMAP.
|
| 444 |
|
| 445 |
+
---
|
| 446 |
|
| 447 |
+
# **7. What Can Be Tested**
|
| 448 |
|
| 449 |
+
All pure modules have unit tests:
|
| 450 |
|
| 451 |
+
| Module | Tested? | Notes |
|
| 452 |
+
| ------------------- | -------- | -------------- |
|
| 453 |
+
| generator_core | ✓ | mock provider |
|
| 454 |
+
| verifier_core | ✓ | mock provider |
|
| 455 |
+
| reward_core | ✓ | mock provider |
|
| 456 |
+
| question_core | ✓ | mock provider |
|
| 457 |
+
| provider dispatcher | ✓ | dispatch logic |
|
| 458 |
+
| retrieval | ✓✓ | mock + real ES |
|
| 459 |
+
| pipeline (mock) | ✓ | integration |
|
| 460 |
+
| pipeline (real) | optional | can add later |
|
| 461 |
|
| 462 |
+
Your test suite is healthy:
|
|
|
|
| 463 |
|
| 464 |
+
```
|
| 465 |
+
9 files, 27 tests → all pass
|
| 466 |
+
```
|
| 467 |
|
| 468 |
+
---
|
| 469 |
|
| 470 |
+
# **8. Logging & Verbose Mode**
|
|
|
|
|
|
|
| 471 |
|
| 472 |
+
All stages print diagnostics when `verbose` is passed to:
|
| 473 |
|
| 474 |
+
```
|
| 475 |
+
npm run pipeline -- --verbose
|
| 476 |
+
```
|
| 477 |
|
| 478 |
+
Includes:
|
| 479 |
+
|
| 480 |
+
* first chunk preview
|
| 481 |
+
* raw LLM output
|
| 482 |
+
* parsed JSON
|
| 483 |
+
* acceptance status
|
| 484 |
+
* error messages
|
| 485 |
|
| 486 |
+
---
|
| 487 |
|
| 488 |
+
# **9. Future Extensions**
|
| 489 |
|
| 490 |
+
As per ROADMAP:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 491 |
|
| 492 |
+
* split pipeline into smaller modules
|
| 493 |
+
* improved QG (HyDE, retries, JSON repair)
|
| 494 |
+
* dedupe (minhash)
|
| 495 |
+
* gold dataset quality metrics
|
| 496 |
+
* full distillation cycle (generator → verifier → reward → training → new generator)
|
| 497 |
|
| 498 |
---
|
| 499 |
|
| 500 |
+
# **10. Successor Notes**
|
| 501 |
+
|
| 502 |
+
This project is:
|
| 503 |
+
|
| 504 |
+
* entirely Node.js ESM
|
| 505 |
+
* fully testable end-to-end
|
| 506 |
+
* GPU-agnostic
|
| 507 |
+
* provider-agnostic
|
| 508 |
+
* prompt-driven
|
| 509 |
+
* safe to modify when modularized
|
| 510 |
+
|
| 511 |
+
Golden rule:
|
| 512 |
+
|
| 513 |
+
> Never mix CLI code with pipeline logic.
|
| 514 |
+
> Put everything pure into `*_core.mjs`, test it, then wrap it in CLI tools.
|
| 515 |
+
|
| 516 |
+
---
|
| 517 |
|
| 518 |
+
If you'd like, I can also:
|
| 519 |
|
| 520 |
+
✓ generate the **next version** of pipeline modularization
|
| 521 |
+
✓ implement `PIPELINE_SEED_MODE=question-first` fully
|
| 522 |
+
✓ add a **chunk loader** so QG works immediately
|
| 523 |
+
✓ produce a **Mermaid architecture diagram**
|
| 524 |
+
✓ produce a **successor prompt** to embed in the repo
|
| 525 |
|
| 526 |
+
Just tell me.
|
README.md
CHANGED
|
@@ -1,22 +1,304 @@
|
|
| 1 |
-
|
| 2 |
|
| 3 |
-
|
| 4 |
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
|
| 8 |
-
Small 7–8B model produces many candidate answers.
|
| 9 |
|
| 10 |
-
|
| 11 |
-
|
|
|
|
| 12 |
|
| 13 |
-
|
| 14 |
-
A strong 70B reward model scores the remaining items.
|
| 15 |
|
| 16 |
-
|
| 17 |
-
Top-K samples become the next training batch.
|
| 18 |
|
| 19 |
-
|
| 20 |
-
Use gold.jsonl for LoRA finetuning.
|
| 21 |
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Here is a **clean, successor-ready `README.md`** for your `distill-pipeline` repo.
|
| 2 |
|
| 3 |
+
It:
|
| 4 |
|
| 5 |
+
* matches your actual codebase **right now**
|
| 6 |
+
* includes the new **question generation** subsystem
|
| 7 |
+
* documents both **static** and **question-first** seed modes
|
| 8 |
+
* gives correct usage for `npm run pipeline`
|
| 9 |
+
* shows environment variables clearly
|
| 10 |
+
* stays pragmatic and Canadian-English-friendly
|
| 11 |
+
* is concise enough for GitHub, but complete enough for onboarding a new engineer
|
| 12 |
|
| 13 |
+
Paste it into:
|
|
|
|
| 14 |
|
| 15 |
+
```
|
| 16 |
+
distill-pipeline/README.md
|
| 17 |
+
```
|
| 18 |
|
| 19 |
+
---
|
|
|
|
| 20 |
|
| 21 |
+
# **distill-pipeline**
|
|
|
|
| 22 |
|
| 23 |
+
*A modular, retrieval-augmented LLM distillation system.*
|
|
|
|
| 24 |
|
| 25 |
+
This project runs a multi-stage reasoning pipeline:
|
| 26 |
+
|
| 27 |
+
1. **Question Generation** (optional)
|
| 28 |
+
2. **Retrieval** from a distill-rag Elasticsearch index
|
| 29 |
+
3. **Generator** (teacher model)
|
| 30 |
+
4. **Verifier** (format, alignment, tone)
|
| 31 |
+
5. **Reward Model** (scoring)
|
| 32 |
+
6. **Gold Writer** (clean JSONL dataset)
|
| 33 |
+
|
| 34 |
+
The pipeline is designed for **bootstrapped distillation**, where each cycle improves the model and the dataset.
|
| 35 |
+
All components run locally and support multiple providers (Ollama, HTTP, OpenAI, vLLM).
|
| 36 |
+
|
| 37 |
+
---
|
| 38 |
+
|
| 39 |
+
# **Features**
|
| 40 |
+
|
| 41 |
+
### ✔ Retrieval-augmented generation
|
| 42 |
+
|
| 43 |
+
Hybrid RRF search (BM25 + dense embeddings) via **distill-rag**.
|
| 44 |
+
|
| 45 |
+
### ✔ Modular LLM stages
|
| 46 |
+
|
| 47 |
+
Each stage uses a provider implementing:
|
| 48 |
+
|
| 49 |
+
```js
|
| 50 |
+
async generate(prompt, options?)
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
### ✔ Question generation from chunks
|
| 54 |
+
|
| 55 |
+
LLM extracts focused questions directly from transcript chunks.
|
| 56 |
+
Ideal for large-scale bootstrap distillation.
|
| 57 |
+
|
| 58 |
+
### ✔ Multiple providers
|
| 59 |
+
|
| 60 |
+
Configured per-stage using environment variables:
|
| 61 |
+
|
| 62 |
+
```
|
| 63 |
+
GENERATOR_PROVIDER
|
| 64 |
+
VERIFIER_PROVIDER
|
| 65 |
+
REWARD_PROVIDER
|
| 66 |
+
QUESTION_PROVIDER
|
| 67 |
+
```
|
| 68 |
+
|
| 69 |
+
Providers currently supported:
|
| 70 |
+
|
| 71 |
+
* Ollama
|
| 72 |
+
* OpenAI
|
| 73 |
+
* HTTP endpoint
|
| 74 |
+
* (future) vLLM server
|
| 75 |
+
|
| 76 |
+
### ✔ Fully tested
|
| 77 |
+
|
| 78 |
+
All pure modules include Vitest coverage:
|
| 79 |
+
|
| 80 |
+
* retrieval (mock + real ES)
|
| 81 |
+
* generator, verifier, reward
|
| 82 |
+
* question generation
|
| 83 |
+
* provider router
|
| 84 |
+
* pipeline integration (mock)
|
| 85 |
+
|
| 86 |
+
---
|
| 87 |
+
|
| 88 |
+
# **Project Structure**
|
| 89 |
+
|
| 90 |
+
```
|
| 91 |
+
prompts/
|
| 92 |
+
generator_prompt.txt
|
| 93 |
+
verifier_prompt.txt
|
| 94 |
+
reward_prompt.txt
|
| 95 |
+
question_prompt.txt
|
| 96 |
+
|
| 97 |
+
src/
|
| 98 |
+
pipeline/
|
| 99 |
+
pipeline.mjs
|
| 100 |
+
pipeline_cli.mjs
|
| 101 |
+
providers/
|
| 102 |
+
provider.mjs
|
| 103 |
+
ollama_provider.mjs
|
| 104 |
+
openai_provider.mjs
|
| 105 |
+
http_provider.mjs
|
| 106 |
+
retrieval/
|
| 107 |
+
retrieval.mjs
|
| 108 |
+
generator/
|
| 109 |
+
generator_core.mjs
|
| 110 |
+
verifier/
|
| 111 |
+
verifier_core.mjs
|
| 112 |
+
reward/
|
| 113 |
+
reward_core.mjs
|
| 114 |
+
question/
|
| 115 |
+
question_core.mjs
|
| 116 |
+
question_cli.mjs
|
| 117 |
+
|
| 118 |
+
test_samples/
|
| 119 |
+
seed_questions.jsonl
|
| 120 |
+
|
| 121 |
+
gold/
|
| 122 |
+
(pipeline output)
|
| 123 |
+
|
| 124 |
+
tests/
|
| 125 |
+
*.test.mjs
|
| 126 |
+
```
|
| 127 |
+
|
| 128 |
+
---
|
| 129 |
+
|
| 130 |
+
# **Installation**
|
| 131 |
+
|
| 132 |
+
```bash
|
| 133 |
+
git clone https://github.com/yourname/distill-pipeline
|
| 134 |
+
cd distill-pipeline
|
| 135 |
+
npm install
|
| 136 |
+
```
|
| 137 |
+
|
| 138 |
+
You also need a running **distill-rag** instance with:
|
| 139 |
+
|
| 140 |
+
* Elasticsearch index
|
| 141 |
+
* embedding server (Ollama or HTTP)
|
| 142 |
+
|
| 143 |
+
---
|
| 144 |
+
|
| 145 |
+
# **Configuration**
|
| 146 |
+
|
| 147 |
+
All runtime settings are configured via `.env`.
|
| 148 |
+
|
| 149 |
+
A common example:
|
| 150 |
+
|
| 151 |
+
```env
|
| 152 |
+
# Elasticsearch (from distill-rag)
|
| 153 |
+
ES_NODE=http://localhost:9200
|
| 154 |
+
ES_INDEX=quo_distill_index
|
| 155 |
+
|
| 156 |
+
# Embedding server
|
| 157 |
+
EMBED_URL=http://localhost:11434/api/embeddings
|
| 158 |
+
EMBED_MODEL=mxbai-embed-large
|
| 159 |
+
|
| 160 |
+
# Provider backends
|
| 161 |
+
GENERATOR_PROVIDER=ollama
|
| 162 |
+
VERIFIER_PROVIDER=ollama
|
| 163 |
+
REWARD_PROVIDER=ollama
|
| 164 |
+
QUESTION_PROVIDER=ollama
|
| 165 |
+
|
| 166 |
+
# Stage-specific models
|
| 167 |
+
GENERATOR_MODEL=qwen3-vl:8b-thinking
|
| 168 |
+
VERIFIER_MODEL=tensortemplar/patronus-lynx:8b-instruct-q4_K_M
|
| 169 |
+
REWARD_MODEL=tensortemplar/patronus-lynx:8b-instruct-q4_K_M
|
| 170 |
+
QUESTION_MODEL=qwen2.5-7b-instruct
|
| 171 |
+
```
|
| 172 |
+
|
| 173 |
+
---
|
| 174 |
+
|
| 175 |
+
# **Running the Pipeline**
|
| 176 |
+
|
| 177 |
+
There are **two seed modes**.
|
| 178 |
+
|
| 179 |
+
---
|
| 180 |
+
|
| 181 |
+
## **1. Static Seed Mode** *(default)*
|
| 182 |
+
|
| 183 |
+
Reads questions from:
|
| 184 |
+
|
| 185 |
+
```
|
| 186 |
+
test_samples/seed_questions.jsonl
|
| 187 |
+
```
|
| 188 |
+
|
| 189 |
+
Run:
|
| 190 |
+
|
| 191 |
+
```bash
|
| 192 |
+
npm run pipeline -- --limit 20 --verbose
|
| 193 |
+
```
|
| 194 |
+
|
| 195 |
+
---
|
| 196 |
+
|
| 197 |
+
## **2. Question-First Mode (auto-generate questions)**
|
| 198 |
+
|
| 199 |
+
The pipeline will:
|
| 200 |
+
|
| 201 |
+
* fetch chunks from distill-rag,
|
| 202 |
+
* run question extraction,
|
| 203 |
+
* feed each question into the main pipeline.
|
| 204 |
+
|
| 205 |
+
Enable this mode:
|
| 206 |
+
|
| 207 |
+
```bash
|
| 208 |
+
PIPELINE_SEED_MODE=question-first npm run pipeline -- --limit 20 --verbose
|
| 209 |
+
```
|
| 210 |
+
|
| 211 |
+
---
|
| 212 |
+
|
| 213 |
+
# **Outputs**
|
| 214 |
+
|
| 215 |
+
Accepted samples are written to:
|
| 216 |
+
|
| 217 |
+
```
|
| 218 |
+
gold/pipeline_gold.jsonl
|
| 219 |
+
```
|
| 220 |
+
|
| 221 |
+
Each record contains:
|
| 222 |
+
|
| 223 |
+
```json
|
| 224 |
+
{
|
| 225 |
+
"question": "...",
|
| 226 |
+
"context": [...],
|
| 227 |
+
"sample": { ... },
|
| 228 |
+
"verifier": { ... },
|
| 229 |
+
"reward": { ... }
|
| 230 |
+
}
|
| 231 |
+
```
|
| 232 |
+
|
| 233 |
+
This file is ready for use in QLoRA SFT training.
|
| 234 |
+
|
| 235 |
+
---
|
| 236 |
+
|
| 237 |
+
# **Running Tests**
|
| 238 |
+
|
| 239 |
+
```bash
|
| 240 |
+
npm test
|
| 241 |
+
```
|
| 242 |
+
|
| 243 |
+
All core logic modules are covered:
|
| 244 |
+
|
| 245 |
+
```
|
| 246 |
+
9 test files
|
| 247 |
+
27 tests
|
| 248 |
+
0 failures
|
| 249 |
+
```
|
| 250 |
+
|
| 251 |
+
---
|
| 252 |
+
|
| 253 |
+
# **How to Extend**
|
| 254 |
+
|
| 255 |
+
## Add a new model provider
|
| 256 |
+
|
| 257 |
+
Implement:
|
| 258 |
+
|
| 259 |
+
```js
|
| 260 |
+
class MyProvider {
|
| 261 |
+
constructor(stage) { ... }
|
| 262 |
+
async generate(prompt, opts) { ... }
|
| 263 |
+
}
|
| 264 |
+
```
|
| 265 |
+
|
| 266 |
+
Then register it in:
|
| 267 |
+
|
| 268 |
+
```
|
| 269 |
+
src/providers/provider.mjs
|
| 270 |
+
```
|
| 271 |
+
|
| 272 |
+
## Add a new pipeline stage
|
| 273 |
+
|
| 274 |
+
Follow the existing structure:
|
| 275 |
+
|
| 276 |
+
* create `src/<stage>/<stage>_core.mjs`
|
| 277 |
+
* add prompt in `prompts/`
|
| 278 |
+
* add a test in `tests/`
|
| 279 |
+
|
| 280 |
+
---
|
| 281 |
+
|
| 282 |
+
# **Development Notes**
|
| 283 |
+
|
| 284 |
+
* Avoid mixing CLI logic with pipeline logic — all pure functions are in `*_core.mjs`.
|
| 285 |
+
* Providers must always return **JSON-parseable** output.
|
| 286 |
+
* Retrieval expects a working **distill-rag** index with BM25 + vector embeddings.
|
| 287 |
+
* Reward model may be swapped later for your custom HTTP reward server.
|
| 288 |
+
|
| 289 |
+
---
|
| 290 |
+
|
| 291 |
+
# **License**
|
| 292 |
+
|
| 293 |
+
MIT (or update as needed).
|
| 294 |
+
|
| 295 |
+
---
|
| 296 |
+
|
| 297 |
+
If you want:
|
| 298 |
+
|
| 299 |
+
✓ a shorter GitHub-friendly description
|
| 300 |
+
✓ a more polished badge/header section
|
| 301 |
+
✓ install instructions tailored to your exact environment
|
| 302 |
+
✓ a separate `USAGE.md`
|
| 303 |
+
|
| 304 |
+
Just ask.
|
ROADMAP.md
CHANGED
|
@@ -1,12 +1,8 @@
|
|
| 1 |
-
Here
|
| 2 |
|
| 3 |
-
|
| 4 |
-
* includes **all** the missing and under-emphasized elements you listed
|
| 5 |
-
* remains compact, actionable, and successor-friendly
|
| 6 |
-
* avoids tone drift
|
| 7 |
-
* is easy to maintain inside the repo
|
| 8 |
|
| 9 |
-
|
| 10 |
|
| 11 |
---
|
| 12 |
|
|
@@ -14,285 +10,353 @@ I kept it in a practical engineering style, with Canadian English and grounded c
|
|
| 14 |
|
| 15 |
*distill-rag + distill-pipeline — Project Roadmap*
|
| 16 |
|
| 17 |
-
This roadmap
|
| 18 |
-
It
|
| 19 |
|
| 20 |
---
|
| 21 |
|
| 22 |
-
# 1.
|
| 23 |
|
| 24 |
-
The
|
| 25 |
|
| 26 |
-
## **A. distill-rag**
|
| 27 |
|
| 28 |
-
A
|
| 29 |
|
| 30 |
-
|
| 31 |
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
-
|
|
|
|
|
|
|
| 35 |
|
| 36 |
```
|
| 37 |
-
|
| 38 |
-
→ generator
|
| 39 |
-
→ verifier
|
| 40 |
-
→ reward
|
| 41 |
-
→ gold builder
|
| 42 |
-
→ training (LoRA/SFT)
|
| 43 |
-
→ repeat cycles
|
| 44 |
```
|
| 45 |
|
| 46 |
-
**Status:**
|
| 47 |
|
| 48 |
---
|
| 49 |
|
| 50 |
-
# 2.
|
| 51 |
|
| 52 |
-
## **2.1
|
| 53 |
|
| 54 |
-
|
|
|
|
| 55 |
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
-
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
```
|
| 71 |
|
| 72 |
-
|
| 73 |
|
| 74 |
-
|
|
|
|
|
|
|
| 75 |
|
| 76 |
-
|
| 77 |
|
| 78 |
-
|
| 79 |
-
* generator (mock LLM)
|
| 80 |
-
* verifier (structural + Q’uo-tone)
|
| 81 |
-
* reward (correct scoring, JSON validity)
|
| 82 |
-
* gold-builder (top-k filter, dedup, JSONL append)
|
| 83 |
-
* integration test with mocked providers
|
| 84 |
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
---
|
| 88 |
|
| 89 |
-
# 3.
|
| 90 |
|
| 91 |
-
## **3.1
|
| 92 |
|
| 93 |
-
|
| 94 |
|
| 95 |
```js
|
| 96 |
-
provider.generate(prompt,
|
| 97 |
```
|
| 98 |
|
| 99 |
-
|
| 100 |
|
| 101 |
-
* OllamaProvider
|
| 102 |
-
* OpenAIProvider
|
| 103 |
-
*
|
| 104 |
-
*
|
| 105 |
|
| 106 |
-
|
| 107 |
|
| 108 |
-
|
| 109 |
|
| 110 |
-
|
| 111 |
|
| 112 |
-
|
| 113 |
|
| 114 |
-
|
| 115 |
-
* Humble, gentle, non-authoritarian
|
| 116 |
-
* No prediction, no medical or legal claims
|
| 117 |
-
* Cite session date when known
|
| 118 |
-
* Ground in existing Ra/Q’uo material
|
| 119 |
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
-
|
| 123 |
-
* Reject speculation, distortions, or unreferenced claims
|
| 124 |
-
* Flag tone mismatch (e.g., assertiveness, ego, “command” language)
|
| 125 |
-
* Output strict JSON verdict: pass/fail + reason
|
| 126 |
|
| 127 |
-
##
|
| 128 |
|
| 129 |
-
|
| 130 |
|
| 131 |
-
|
| 132 |
-
* faithfulness to Confederation teaching
|
| 133 |
-
* lack of hallucination
|
| 134 |
-
* gentle and precise tone
|
| 135 |
-
* internal coherence
|
| 136 |
|
| 137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
---
|
| 140 |
|
| 141 |
-
# 4.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
-
|
| 144 |
|
| 145 |
-
*
|
| 146 |
-
|
| 147 |
-
|
| 148 |
|
| 149 |
-
|
| 150 |
-
~2 tok/s for 70B reward models
|
| 151 |
-
~4 tok/s for 13B–20B base models
|
| 152 |
-
Perfect for Nemotron 70B reward scoring + LoRA training
|
| 153 |
|
| 154 |
-
**
|
| 155 |
-
|
|
|
|
|
|
|
| 156 |
|
| 157 |
---
|
| 158 |
|
| 159 |
-
# 5.
|
| 160 |
|
| 161 |
-
## **5.1
|
| 162 |
|
| 163 |
-
|
| 164 |
|
| 165 |
```
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
```
|
| 170 |
|
| 171 |
-
|
| 172 |
|
| 173 |
-
|
| 174 |
-
* high-score samples fed back as new training data
|
| 175 |
-
* prompts updated (meta-learning via examples)
|
| 176 |
|
| 177 |
-
## **5.2
|
| 178 |
|
| 179 |
Add:
|
| 180 |
|
| 181 |
-
*
|
| 182 |
-
*
|
| 183 |
-
*
|
| 184 |
-
|
| 185 |
-
|
|
|
|
| 186 |
|
| 187 |
---
|
| 188 |
|
| 189 |
-
# 6.
|
| 190 |
|
| 191 |
-
## **6.1 Confed-aligned distilled model**
|
| 192 |
|
| 193 |
-
|
|
|
|
| 194 |
|
| 195 |
-
|
|
|
|
|
|
|
|
|
|
| 196 |
|
| 197 |
-
|
| 198 |
-
* Include commentary from L/L Research books
|
| 199 |
-
* Add metadata: session dates, question themes, speaker (Q’uo/Latwii)
|
| 200 |
-
* Release on HuggingFace with full attribution
|
| 201 |
|
| 202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
|
| 204 |
-
|
| 205 |
|
| 206 |
-
*
|
| 207 |
-
*
|
| 208 |
-
*
|
| 209 |
-
*
|
|
|
|
| 210 |
|
| 211 |
---
|
| 212 |
|
| 213 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
|
| 215 |
-
#
|
|
|
|
|
|
|
| 216 |
|
| 217 |
* axios
|
| 218 |
-
* jsonlines
|
| 219 |
* dotenv
|
| 220 |
* vitest
|
| 221 |
-
*
|
| 222 |
-
|
| 223 |
-
### **Python**
|
| 224 |
|
| 225 |
-
|
| 226 |
|
| 227 |
* transformers
|
| 228 |
-
* datasets
|
| 229 |
* accelerate
|
| 230 |
* bitsandbytes
|
|
|
|
| 231 |
* peft
|
| 232 |
* wandb (optional)
|
| 233 |
-
* sentencepiece
|
| 234 |
|
| 235 |
---
|
| 236 |
|
| 237 |
-
# 8.
|
| 238 |
|
| 239 |
-
* **
|
| 240 |
-
|
| 241 |
-
* **
|
| 242 |
-
|
| 243 |
-
* **
|
| 244 |
-
Try smaller reward model except for final scoring pass
|
| 245 |
-
* **Bad JSON from generator**
|
| 246 |
-
Wrap output with streaming JSON repair (fastjsonrepair)
|
| 247 |
|
| 248 |
---
|
| 249 |
|
| 250 |
-
# 9.
|
| 251 |
-
|
| 252 |
-
```js
|
| 253 |
-
const ctx = await retrieval.hybridSearch(query);
|
| 254 |
-
|
| 255 |
-
const candidates = await runGenerator(query, ctx, generatorProvider);
|
| 256 |
-
|
| 257 |
-
const verified = await Promise.all(
|
| 258 |
-
candidates.map(c => runVerifier(c, verifierProvider))
|
| 259 |
-
);
|
| 260 |
-
|
| 261 |
-
const scored = await Promise.all(
|
| 262 |
-
verified.filter(v => v.pass).map(v => runReward(v.sample, rewardProvider))
|
| 263 |
-
);
|
| 264 |
-
|
| 265 |
-
const gold = topK(scored, k = 50); // 5–10% of total
|
| 266 |
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
```
|
| 275 |
|
| 276 |
---
|
| 277 |
|
| 278 |
-
# 10.
|
| 279 |
|
| 280 |
-
|
| 281 |
|
| 282 |
-
|
| 283 |
|
| 284 |
-
|
| 285 |
-
* easy updates when new transcripts appear
|
| 286 |
-
* clear tone alignment
|
| 287 |
-
* practical local training on accessible GPUs
|
| 288 |
-
* RAG-augmented accuracy using authoritative sources
|
| 289 |
|
| 290 |
---
|
| 291 |
|
| 292 |
If you want, I can also generate:
|
| 293 |
|
| 294 |
-
**✓
|
| 295 |
-
**✓
|
| 296 |
-
**✓
|
| 297 |
|
| 298 |
-
Just
|
|
|
|
| 1 |
+
Here you go — a fully updated **ROADMAP.md**, incorporating your request to modularize the pipeline, add tests per module, make successor handoff seamless, and capture everything you’ve built and planned up to this point.
|
| 2 |
|
| 3 |
+
I kept the style grounded, clear, and Canadian-English, and ensured it matches what’s actually happening in the repo.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
+
You can drop this straight into the repo root.
|
| 6 |
|
| 7 |
---
|
| 8 |
|
|
|
|
| 10 |
|
| 11 |
*distill-rag + distill-pipeline — Project Roadmap*
|
| 12 |
|
| 13 |
+
This roadmap defines the current state, upcoming milestones, technical direction, modularization plan, and long-term vision of the system.
|
| 14 |
+
It is written for a future maintainer (“successor”) who needs to understand the architecture quickly.
|
| 15 |
|
| 16 |
---
|
| 17 |
|
| 18 |
+
# **1. System Overview**
|
| 19 |
|
| 20 |
+
The project consists of two coordinated repositories:
|
| 21 |
|
| 22 |
+
### **A. distill-rag**
|
| 23 |
|
| 24 |
+
A full ingestion + indexing system for Q’uo/Ra transcripts and related materials.
|
| 25 |
|
| 26 |
+
Pipeline:
|
| 27 |
|
| 28 |
+
```
|
| 29 |
+
extract → clean → session group → chunk → embed → ES index → hybrid search HTTP API
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
**Status:** Stable, production-ready.
|
| 33 |
+
|
| 34 |
+
---
|
| 35 |
|
| 36 |
+
### **B. distill-pipeline**
|
| 37 |
+
|
| 38 |
+
A multi-stage data-generation and distillation system:
|
| 39 |
|
| 40 |
```
|
| 41 |
+
(question-generation) → retrieval → generator → verifier → reward → gold → training → repeat
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
```
|
| 43 |
|
| 44 |
+
**Status:** Actively developed, now modular, test-covered, and extendable.
|
| 45 |
|
| 46 |
---
|
| 47 |
|
| 48 |
+
# **2. Immediate Priorities (0–7 days)**
|
| 49 |
|
| 50 |
+
## **2.1 Fully Modularize `pipeline/` (critical)**
|
| 51 |
|
| 52 |
+
The current `pipeline.mjs` is too large for safe updates.
|
| 53 |
+
Break it into these modules:
|
| 54 |
|
| 55 |
+
```
|
| 56 |
+
src/pipeline/
|
| 57 |
+
pipeline.mjs (orchestrator only)
|
| 58 |
+
retrieval_stage.mjs (retrieval logic)
|
| 59 |
+
generator_stage.mjs (calls runGenerator)
|
| 60 |
+
verifier_stage.mjs (calls runVerifier)
|
| 61 |
+
reward_stage.mjs (calls runReward)
|
| 62 |
+
seeds.mjs (loading + dynamic QG)
|
| 63 |
+
gold_writer.mjs (appendGoldRecord)
|
| 64 |
+
```
|
| 65 |
|
| 66 |
+
Tests per module:
|
| 67 |
|
| 68 |
+
```
|
| 69 |
+
tests/pipeline/
|
| 70 |
+
test_retrieval_stage.mjs
|
| 71 |
+
test_generator_stage.mjs
|
| 72 |
+
test_verifier_stage.mjs
|
| 73 |
+
test_reward_stage.mjs
|
| 74 |
+
test_gold_writer.mjs
|
| 75 |
+
test_integration_mocked.mjs
|
| 76 |
+
```
|
| 77 |
|
| 78 |
+
Outcome:
|
| 79 |
+
The orchestrator becomes a clean 80–120 line file, easy to modify without clobbering.
|
| 80 |
+
|
| 81 |
+
---
|
| 82 |
+
|
| 83 |
+
## **2.2 Pipeline entry mode: content-first**
|
| 84 |
+
|
| 85 |
+
Current limiting factor: static seed questions.
|
| 86 |
+
Pipeline must start with:
|
| 87 |
+
|
| 88 |
+
```
|
| 89 |
+
chunk → question generation → retrieval over chunk → generator → …
|
| 90 |
```
|
| 91 |
|
| 92 |
+
Implement:
|
| 93 |
|
| 94 |
+
```
|
| 95 |
+
PIPELINE_SEED_MODE = 'question-first' | 'static'
|
| 96 |
+
```
|
| 97 |
|
| 98 |
+
Default: **question-first**.
|
| 99 |
|
| 100 |
+
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
+
## **2.3 Improve verbosity + telemetry**
|
| 103 |
+
|
| 104 |
+
Add structured logs:
|
| 105 |
+
|
| 106 |
+
```
|
| 107 |
+
[pipeline] question:
|
| 108 |
+
[pipeline] retrieval:
|
| 109 |
+
[pipeline] generator:
|
| 110 |
+
[pipeline] verifier:
|
| 111 |
+
[pipeline] reward:
|
| 112 |
+
[pipeline] accepted/rejected:
|
| 113 |
+
```
|
| 114 |
+
|
| 115 |
+
Make it possible to:
|
| 116 |
+
|
| 117 |
+
```
|
| 118 |
+
npm run pipeline:verbose
|
| 119 |
+
```
|
| 120 |
+
|
| 121 |
+
and see *exactly* what each model returned.
|
| 122 |
|
| 123 |
---
|
| 124 |
|
| 125 |
+
# **3. Short-Term (1–3 weeks)**
|
| 126 |
|
| 127 |
+
## **3.1 Provider System (done but expand)**
|
| 128 |
|
| 129 |
+
Abstract interface:
|
| 130 |
|
| 131 |
```js
|
| 132 |
+
provider.generate(prompt, { temperature?, system?, format? })
|
| 133 |
```
|
| 134 |
|
| 135 |
+
Adapters:
|
| 136 |
|
| 137 |
+
* OllamaProvider (primary local backend)
|
| 138 |
+
* OpenAIProvider (for debugging)
|
| 139 |
+
* HttpProvider (for external reward servers)
|
| 140 |
+
* vLLMProvider (gpu-http inference)
|
| 141 |
|
| 142 |
+
Goal: any backend can be plugged in without touching pipeline code.
|
| 143 |
|
| 144 |
+
---
|
| 145 |
|
| 146 |
+
## **3.2 Question Generation Refinement**
|
| 147 |
|
| 148 |
+
Your QG model must be reliable, JSON-clean, and chunk-aware.
|
| 149 |
|
| 150 |
+
Add:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
+
* fastjsonrepair or equivalent fallback
|
| 153 |
+
* retry logic if parsing fails
|
| 154 |
+
* score-based filtering of bad questions
|
| 155 |
+
* deduplication (Levenshtein or minhash)
|
| 156 |
|
| 157 |
+
---
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
+
## **3.3 Verifier + Reward spec finalization**
|
| 160 |
|
| 161 |
+
Define strict JSON schemas:
|
| 162 |
|
| 163 |
+
### Verifier output
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
+
```json
|
| 166 |
+
{
|
| 167 |
+
"ok": true,
|
| 168 |
+
"reason": "string",
|
| 169 |
+
"alignment": {
|
| 170 |
+
"tone": 8,
|
| 171 |
+
"accuracy": 7,
|
| 172 |
+
"faithfulness": 9
|
| 173 |
+
}
|
| 174 |
+
}
|
| 175 |
+
```
|
| 176 |
+
|
| 177 |
+
### Reward output
|
| 178 |
+
|
| 179 |
+
```json
|
| 180 |
+
{
|
| 181 |
+
"ok": true,
|
| 182 |
+
"score": 8.3,
|
| 183 |
+
"dimensions": {
|
| 184 |
+
"clarity": 8,
|
| 185 |
+
"faithfulness": 9,
|
| 186 |
+
"gentleness": 10,
|
| 187 |
+
"hallucination": 0
|
| 188 |
+
}
|
| 189 |
+
}
|
| 190 |
+
```
|
| 191 |
+
|
| 192 |
+
This gives you numerical hooks for downstream filtering.
|
| 193 |
|
| 194 |
---
|
| 195 |
|
| 196 |
+
# **4. Hardware Strategy**
|
| 197 |
+
|
| 198 |
+
Based on your setup:
|
| 199 |
+
|
| 200 |
+
### **RTX 3090 (24 GB)**
|
| 201 |
+
|
| 202 |
+
* heavy reward model scoring (e.g., Nemotron 70B, Skywork 32B)
|
| 203 |
+
* LoRA/Q-LoRA training
|
| 204 |
+
* batch generation if needed
|
| 205 |
|
| 206 |
+
### **RTX 3060 (12 GB)**
|
| 207 |
|
| 208 |
+
* generator (8B–14B models)
|
| 209 |
+
* verifier (7B–8B models)
|
| 210 |
+
* embeddings (mxbai-embed-large)
|
| 211 |
|
| 212 |
+
Nightly cycle can produce:
|
|
|
|
|
|
|
|
|
|
| 213 |
|
| 214 |
+
* **1,000–1,500 candidates**
|
| 215 |
+
* **150–250 gold samples**
|
| 216 |
+
|
| 217 |
+
Good for a **2–4 hr QLoRA** run per day.
|
| 218 |
|
| 219 |
---
|
| 220 |
|
| 221 |
+
# **5. Medium-Term (1–2 months)**
|
| 222 |
|
| 223 |
+
## **5.1 Fully automated bootstrap loops**
|
| 224 |
|
| 225 |
+
End-to-end automation:
|
| 226 |
|
| 227 |
```
|
| 228 |
+
1. Ingest new transcripts (distill-rag)
|
| 229 |
+
2. QG to produce fresh questions
|
| 230 |
+
3. Retrieval + pipeline generation
|
| 231 |
+
4. Filtering (verifier + reward + PPL check)
|
| 232 |
+
5. Append to gold dataset
|
| 233 |
+
6. Train new LoRA
|
| 234 |
+
7. Replace generator with improved student
|
| 235 |
+
8. Repeat
|
| 236 |
```
|
| 237 |
|
| 238 |
+
Each iteration improves tone, accuracy, and alignment.
|
| 239 |
|
| 240 |
+
---
|
|
|
|
|
|
|
| 241 |
|
| 242 |
+
## **5.2 Advanced Filtering**
|
| 243 |
|
| 244 |
Add:
|
| 245 |
|
| 246 |
+
* perplexity scoring via llama.cpp or vLLM
|
| 247 |
+
* RAG cross-verification (every claim must appear in indexed Q’uo text)
|
| 248 |
+
* semantic deduplication (minhash / LSH)
|
| 249 |
+
* large-model critic pass (Qwen2.5-72B, Mixtral-Large)
|
| 250 |
+
|
| 251 |
+
Goal: **zero hallucination** and **full Confederation tone integrity**.
|
| 252 |
|
| 253 |
---
|
| 254 |
|
| 255 |
+
# **6. Long-Term (2–6 months)**
|
| 256 |
|
| 257 |
+
## **6.1 The “Confed-aligned” distilled model**
|
| 258 |
|
| 259 |
+
Target:
|
| 260 |
+
A 7B–12B model aligned with:
|
| 261 |
|
| 262 |
+
* Ra Material
|
| 263 |
+
* Q’uo transcripts
|
| 264 |
+
* L/L Research books and commentary
|
| 265 |
+
* supporting Confederation entities
|
| 266 |
|
| 267 |
+
Properties:
|
|
|
|
|
|
|
|
|
|
| 268 |
|
| 269 |
+
* gentle
|
| 270 |
+
* humble
|
| 271 |
+
* grounded in free will
|
| 272 |
+
* non-authoritarian
|
| 273 |
+
* thoughtful, careful, and precise
|
| 274 |
|
| 275 |
+
Releases:
|
| 276 |
|
| 277 |
+
* base model
|
| 278 |
+
* LoRA for tone
|
| 279 |
+
* merged ckpt
|
| 280 |
+
* GGUF for desktop
|
| 281 |
+
* HuggingFace dataset + card
|
| 282 |
|
| 283 |
---
|
| 284 |
|
| 285 |
+
## **6.2 Real-time Distillation**
|
| 286 |
+
|
| 287 |
+
Combine:
|
| 288 |
+
|
| 289 |
+
* hybrid search at runtime
|
| 290 |
+
* small distilled model
|
| 291 |
+
* reward-model reflections
|
| 292 |
+
|
| 293 |
+
This gives:
|
| 294 |
+
|
| 295 |
+
```
|
| 296 |
+
prompt → rag retrieve → distilled answer → critic → improved final answer
|
| 297 |
+
```
|
| 298 |
+
|
| 299 |
+
A gentler, safer, cleaner chatbot.
|
| 300 |
+
|
| 301 |
+
---
|
| 302 |
|
| 303 |
+
# **7. Dependencies**
|
| 304 |
+
|
| 305 |
+
## *Node.js*
|
| 306 |
|
| 307 |
* axios
|
|
|
|
| 308 |
* dotenv
|
| 309 |
* vitest
|
| 310 |
+
* fastjsonrepair
|
| 311 |
+
* jsonlines
|
|
|
|
| 312 |
|
| 313 |
+
## *Python (training)*
|
| 314 |
|
| 315 |
* transformers
|
|
|
|
| 316 |
* accelerate
|
| 317 |
* bitsandbytes
|
| 318 |
+
* datasets
|
| 319 |
* peft
|
| 320 |
* wandb (optional)
|
|
|
|
| 321 |
|
| 322 |
---
|
| 323 |
|
| 324 |
+
# **8. Common Issues + Fixes**
|
| 325 |
|
| 326 |
+
* **Verifier rejects everything** → relax tone threshold; adjust prompt.
|
| 327 |
+
* **Reward is too slow** → use smaller reward model for interim passes.
|
| 328 |
+
* **Generator JSON malformed** → enable repair + retries.
|
| 329 |
+
* **Low acceptance rate** → improve question generation; improve retrieval.
|
| 330 |
+
* **GPU OOM** → use Q3_K_S; reduce max tokens.
|
|
|
|
|
|
|
|
|
|
| 331 |
|
| 332 |
---
|
| 333 |
|
| 334 |
+
# **9. Successor Notes**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
|
| 336 |
+
* Codebase is fully modular.
|
| 337 |
+
* Every module must have a test.
|
| 338 |
+
* Never hardcode model names.
|
| 339 |
+
* All LLM calls must return strict JSON.
|
| 340 |
+
* Always run verbose mode when debugging generation.
|
| 341 |
+
* distill-pipeline should remain side-effect-free except at gold-writer stage.
|
| 342 |
+
* Keep prompts in `./prompts/` and version them carefully.
|
|
|
|
| 343 |
|
| 344 |
---
|
| 345 |
|
| 346 |
+
# **10. High-Level Vision**
|
| 347 |
|
| 348 |
+
This system exists to build a **truth-aligned**, **gentle**, **non-dogmatic**, and **service-to-others oriented** distilled model rooted in Confederation teachings.
|
| 349 |
|
| 350 |
+
It is open, transparent, local, and self-hosted — no dependencies on corporate censorship, filters, or forced model updates.
|
| 351 |
|
| 352 |
+
This roadmap exists so the next builder can continue the work with clarity and confidence.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
|
| 354 |
---
|
| 355 |
|
| 356 |
If you want, I can also generate:
|
| 357 |
|
| 358 |
+
**✓ architecture diagrams**
|
| 359 |
+
**✓ module scaffolding for the pipeline split**
|
| 360 |
+
**✓ successor instructions / handoff document**
|
| 361 |
|
| 362 |
+
Just say the word.
|
package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
| 5 |
"scripts": {
|
| 6 |
"test": "vitest --run",
|
| 7 |
"pipeline": "node ./src/pipeline/pipeline_cli.js",
|
| 8 |
-
"
|
| 9 |
},
|
| 10 |
"devDependencies": {
|
| 11 |
"vitest": "^1.6.0"
|
|
|
|
| 5 |
"scripts": {
|
| 6 |
"test": "vitest --run",
|
| 7 |
"pipeline": "node ./src/pipeline/pipeline_cli.js",
|
| 8 |
+
"pipeline:qg": "PIPELINE_SEED_MODE=question-first node ./src/pipeline/pipeline_cli.js --verbose"
|
| 9 |
},
|
| 10 |
"devDependencies": {
|
| 11 |
"vitest": "^1.6.0"
|
prompts/question_prompt.txt
CHANGED
|
@@ -2,17 +2,20 @@ You are a dataset-creation assistant.
|
|
| 2 |
|
| 3 |
You will be given a CONTEXT CHUNK of text from a larger corpus.
|
| 4 |
|
| 5 |
-
Your
|
| 6 |
|
| 7 |
1. Read the context carefully.
|
| 8 |
-
2.
|
| 9 |
-
that can be answered ONLY
|
| 10 |
-
3.
|
| 11 |
-
-
|
| 12 |
-
-
|
| 13 |
-
-
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
-
Output STRICTLY
|
| 16 |
|
| 17 |
{
|
| 18 |
"questions": [
|
|
@@ -22,9 +25,10 @@ Output STRICTLY in JSON with this shape:
|
|
| 22 |
]
|
| 23 |
}
|
| 24 |
|
| 25 |
-
Do NOT include answers
|
| 26 |
|
| 27 |
---
|
| 28 |
CONTEXT START
|
| 29 |
{{CONTEXT}}
|
| 30 |
CONTEXT END
|
|
|
|
|
|
| 2 |
|
| 3 |
You will be given a CONTEXT CHUNK of text from a larger corpus.
|
| 4 |
|
| 5 |
+
Your goals:
|
| 6 |
|
| 7 |
1. Read the context carefully.
|
| 8 |
+
2. Generate up to {{MAX_QUESTIONS}} diverse, high-quality questions
|
| 9 |
+
that can be answered ONLY using information found inside the context.
|
| 10 |
+
3. Produce questions that:
|
| 11 |
+
- focus strictly on the content of the chunk,
|
| 12 |
+
- avoid hallucinating any information not present,
|
| 13 |
+
- require comprehension, reasoning, or synthesis across the chunk,
|
| 14 |
+
- vary naturally in difficulty (some simple, some deeper),
|
| 15 |
+
- avoid meta or speculative questions,
|
| 16 |
+
- avoid yes/no questions unless they are meaningful.
|
| 17 |
|
| 18 |
+
Output STRICTLY this JSON structure:
|
| 19 |
|
| 20 |
{
|
| 21 |
"questions": [
|
|
|
|
| 25 |
]
|
| 26 |
}
|
| 27 |
|
| 28 |
+
Do NOT include answers. Do NOT add any fields. JSON only.
|
| 29 |
|
| 30 |
---
|
| 31 |
CONTEXT START
|
| 32 |
{{CONTEXT}}
|
| 33 |
CONTEXT END
|
| 34 |
+
|
src/pipeline/batch.mjs
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// src/pipeline/batch.mjs
|
| 2 |
+
import fs from 'fs/promises';
|
| 3 |
+
import path from 'path';
|
| 4 |
+
|
| 5 |
+
import { preview } from './util.mjs';
|
| 6 |
+
import {
|
| 7 |
+
DEFAULT_SEEDS_PATH,
|
| 8 |
+
DEFAULT_OUT_PATH,
|
| 9 |
+
loadSeedQuestions,
|
| 10 |
+
seedToQuestion,
|
| 11 |
+
seedToContextText,
|
| 12 |
+
} from './seeds.mjs';
|
| 13 |
+
import { runPipelineStep } from './step.mjs';
|
| 14 |
+
import { loadProviderFor } from '../providers/provider.mjs';
|
| 15 |
+
import { runQuestionGenerator } from '../question/question_core.mjs';
|
| 16 |
+
|
| 17 |
+
/**
|
| 18 |
+
* Append a single accepted record to a JSONL file.
|
| 19 |
+
*/
|
| 20 |
+
export async function appendGoldRecord(outPath, record) {
|
| 21 |
+
const line = JSON.stringify(record) + '\n';
|
| 22 |
+
await fs.mkdir(path.dirname(outPath), { recursive: true });
|
| 23 |
+
await fs.appendFile(outPath, line, 'utf8');
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
/**
|
| 27 |
+
* Run the pipeline over a batch of seeds and write accepted
|
| 28 |
+
* samples to a JSONL file.
|
| 29 |
+
*
|
| 30 |
+
* Modes:
|
| 31 |
+
* - static (default): seeds are questions (current behaviour)
|
| 32 |
+
* - question-first: seeds are chunks; we first generate questions
|
| 33 |
+
*
|
| 34 |
+
* Options:
|
| 35 |
+
* - seedsPath: JSONL of seeds (defaults to test_samples/seed_questions.jsonl)
|
| 36 |
+
* - outPath: output JSONL (defaults to gold/pipeline_gold.jsonl)
|
| 37 |
+
* - limit: max number of seeds to process
|
| 38 |
+
* - verbose: extra per-stage logging
|
| 39 |
+
* - logger: optional logger (defaults to console)
|
| 40 |
+
* - seedMode: 'static' | 'question-first' (or PIPELINE_SEED_MODE env)
|
| 41 |
+
*
|
| 42 |
+
* Returns:
|
| 43 |
+
* {
|
| 44 |
+
* mode,
|
| 45 |
+
* total, // number of seed lines
|
| 46 |
+
* processed, // number of questions run through pipeline
|
| 47 |
+
* accepted,
|
| 48 |
+
* outPath,
|
| 49 |
+
* statusCounts,
|
| 50 |
+
* processedSeeds?, // only meaningful in question-first
|
| 51 |
+
* processedQuestions?, // alias for processed in question-first
|
| 52 |
+
* }
|
| 53 |
+
*/
|
| 54 |
+
export async function runPipelineBatch({
|
| 55 |
+
seedsPath = DEFAULT_SEEDS_PATH,
|
| 56 |
+
outPath = DEFAULT_OUT_PATH,
|
| 57 |
+
limit,
|
| 58 |
+
verbose = false,
|
| 59 |
+
logger = console,
|
| 60 |
+
seedMode = process.env.PIPELINE_SEED_MODE || 'static',
|
| 61 |
+
} = {}) {
|
| 62 |
+
const log = logger?.log?.bind(logger) || console.log;
|
| 63 |
+
const errLog = logger?.error?.bind(logger) || console.error;
|
| 64 |
+
|
| 65 |
+
const seeds = await loadSeedQuestions(seedsPath);
|
| 66 |
+
const maxSeeds = typeof limit === 'number' ? limit : seeds.length;
|
| 67 |
+
|
| 68 |
+
let processed = 0; // number of questions sent through runPipelineStep
|
| 69 |
+
let accepted = 0;
|
| 70 |
+
const statusCounts = {};
|
| 71 |
+
|
| 72 |
+
// ----------------------------------------
|
| 73 |
+
// MODE 1: existing behaviour (static questions)
|
| 74 |
+
// ----------------------------------------
|
| 75 |
+
if (seedMode === 'static') {
|
| 76 |
+
for (let idx = 0; idx < maxSeeds; idx++) {
|
| 77 |
+
const seed = seeds[idx];
|
| 78 |
+
const question = seedToQuestion(seed);
|
| 79 |
+
const label = `[${idx + 1}/${maxSeeds}]`;
|
| 80 |
+
|
| 81 |
+
log(`→ ${label} Running pipeline for: "${question}"`);
|
| 82 |
+
|
| 83 |
+
try {
|
| 84 |
+
const result = await runPipelineStep({
|
| 85 |
+
question,
|
| 86 |
+
verbose,
|
| 87 |
+
logger,
|
| 88 |
+
});
|
| 89 |
+
|
| 90 |
+
processed += 1;
|
| 91 |
+
statusCounts[result.status] =
|
| 92 |
+
(statusCounts[result.status] || 0) + 1;
|
| 93 |
+
|
| 94 |
+
if (verbose) {
|
| 95 |
+
log(` ↳ status: ${result.status}`);
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
if (result.status === 'accepted') {
|
| 99 |
+
const record = {
|
| 100 |
+
question,
|
| 101 |
+
context: result.context,
|
| 102 |
+
sample: result.gen, // generator output
|
| 103 |
+
verifier: result.ver,
|
| 104 |
+
reward: result.rew,
|
| 105 |
+
};
|
| 106 |
+
|
| 107 |
+
await appendGoldRecord(outPath, record);
|
| 108 |
+
accepted += 1;
|
| 109 |
+
}
|
| 110 |
+
} catch (e) {
|
| 111 |
+
const msg = e?.message || String(e);
|
| 112 |
+
processed += 1;
|
| 113 |
+
statusCounts.pipeline_error =
|
| 114 |
+
(statusCounts.pipeline_error || 0) + 1;
|
| 115 |
+
errLog(' [pipeline] ERROR:', msg);
|
| 116 |
+
}
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
return {
|
| 120 |
+
mode: 'static',
|
| 121 |
+
total: seeds.length,
|
| 122 |
+
processed,
|
| 123 |
+
accepted,
|
| 124 |
+
outPath,
|
| 125 |
+
statusCounts,
|
| 126 |
+
};
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
// ----------------------------------------
|
| 130 |
+
// MODE 2: question-first (generate Qs from chunks)
|
| 131 |
+
// ----------------------------------------
|
| 132 |
+
if (seedMode === 'question-first') {
|
| 133 |
+
const questionProvider = loadProviderFor('question');
|
| 134 |
+
const maxQuestionsPerChunk = Number(process.env.QUESTION_MAX || '5');
|
| 135 |
+
|
| 136 |
+
let processedSeeds = 0;
|
| 137 |
+
|
| 138 |
+
for (let idx = 0; idx < maxSeeds; idx++) {
|
| 139 |
+
const seed = seeds[idx];
|
| 140 |
+
const label = `[seed ${idx + 1}/${maxSeeds}]`;
|
| 141 |
+
|
| 142 |
+
const contextText = seedToContextText(seed);
|
| 143 |
+
if (!contextText || !contextText.trim()) {
|
| 144 |
+
if (verbose) {
|
| 145 |
+
log(`${label} context is empty, skipping`);
|
| 146 |
+
}
|
| 147 |
+
continue;
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
processedSeeds += 1;
|
| 151 |
+
|
| 152 |
+
if (verbose) {
|
| 153 |
+
log(`\n🧩 ${label} generating questions from chunk…`);
|
| 154 |
+
log(
|
| 155 |
+
' [question] chunk preview:\n ' +
|
| 156 |
+
preview(contextText, 300).replace(/\n/g, '\n '),
|
| 157 |
+
);
|
| 158 |
+
log(
|
| 159 |
+
` [question] using provider="question" maxQuestions=${maxQuestionsPerChunk}`,
|
| 160 |
+
);
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
// 1) generate questions from the chunk
|
| 164 |
+
let qResult;
|
| 165 |
+
try {
|
| 166 |
+
qResult = await runQuestionGenerator(
|
| 167 |
+
contextText,
|
| 168 |
+
questionProvider,
|
| 169 |
+
{ maxQuestions: maxQuestionsPerChunk },
|
| 170 |
+
);
|
| 171 |
+
} catch (e) {
|
| 172 |
+
const msg = e?.message || String(e);
|
| 173 |
+
statusCounts.question_error =
|
| 174 |
+
(statusCounts.question_error || 0) + 1;
|
| 175 |
+
if (verbose) {
|
| 176 |
+
errLog(' [question] ERROR:', msg);
|
| 177 |
+
}
|
| 178 |
+
continue;
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
const questions = qResult?.questions || [];
|
| 182 |
+
|
| 183 |
+
if (verbose) {
|
| 184 |
+
log(
|
| 185 |
+
` [question] generated ${questions.length} question(s) from this chunk`,
|
| 186 |
+
);
|
| 187 |
+
if (questions.length > 0) {
|
| 188 |
+
log(
|
| 189 |
+
' [question] first question: "' +
|
| 190 |
+
preview(questions[0], 200) +
|
| 191 |
+
'"',
|
| 192 |
+
);
|
| 193 |
+
}
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
// 2) run full pipeline for each generated question
|
| 197 |
+
for (const q of questions) {
|
| 198 |
+
if (!q || !q.trim()) continue;
|
| 199 |
+
|
| 200 |
+
const qLabel = `[q ${processed + 1}]`;
|
| 201 |
+
log(` → ${qLabel} Running pipeline for generated question: "${q}"`);
|
| 202 |
+
|
| 203 |
+
try {
|
| 204 |
+
const result = await runPipelineStep({
|
| 205 |
+
question: q,
|
| 206 |
+
verbose,
|
| 207 |
+
logger,
|
| 208 |
+
});
|
| 209 |
+
|
| 210 |
+
processed += 1;
|
| 211 |
+
statusCounts[result.status] =
|
| 212 |
+
(statusCounts[result.status] || 0) + 1;
|
| 213 |
+
|
| 214 |
+
if (verbose) {
|
| 215 |
+
log(` ↳ status: ${result.status}`);
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
if (result.status === 'accepted') {
|
| 219 |
+
const record = {
|
| 220 |
+
question: q,
|
| 221 |
+
sourceSeed: seed, // keep origin of the question
|
| 222 |
+
sourceChunk: contextText, // raw chunk we asked about
|
| 223 |
+
context: result.context,
|
| 224 |
+
sample: result.gen,
|
| 225 |
+
verifier: result.ver,
|
| 226 |
+
reward: result.rew,
|
| 227 |
+
};
|
| 228 |
+
|
| 229 |
+
await appendGoldRecord(outPath, record);
|
| 230 |
+
accepted += 1;
|
| 231 |
+
|
| 232 |
+
if (verbose) {
|
| 233 |
+
log(' ✓ accepted and written to gold JSONL');
|
| 234 |
+
}
|
| 235 |
+
}
|
| 236 |
+
} catch (e) {
|
| 237 |
+
const msg = e?.message || String(e);
|
| 238 |
+
processed += 1;
|
| 239 |
+
statusCounts.pipeline_error =
|
| 240 |
+
(statusCounts.pipeline_error || 0) + 1;
|
| 241 |
+
errLog(' [pipeline] ERROR:', msg);
|
| 242 |
+
}
|
| 243 |
+
}
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
return {
|
| 247 |
+
mode: 'question-first',
|
| 248 |
+
total: seeds.length,
|
| 249 |
+
processed, // number of questions processed
|
| 250 |
+
processedSeeds, // how many chunks we actually used
|
| 251 |
+
processedQuestions: processed,
|
| 252 |
+
accepted,
|
| 253 |
+
outPath,
|
| 254 |
+
statusCounts,
|
| 255 |
+
};
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
throw new Error(`Unknown PIPELINE_SEED_MODE: ${seedMode}`);
|
| 259 |
+
}
|
src/pipeline/pipeline.mjs
CHANGED
|
@@ -1,321 +1,9 @@
|
|
| 1 |
// src/pipeline/pipeline.mjs
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
import { runReward } from '../reward/reward_core.mjs';
|
| 11 |
-
|
| 12 |
-
const __filename = fileURLToPath(import.meta.url);
|
| 13 |
-
const __dirname = path.dirname(__filename);
|
| 14 |
-
const PROJECT_ROOT = path.join(__dirname, '..', '..');
|
| 15 |
-
|
| 16 |
-
const DEFAULT_SEEDS_PATH = path.join(
|
| 17 |
-
PROJECT_ROOT,
|
| 18 |
-
'test_samples',
|
| 19 |
-
'seed_questions.jsonl',
|
| 20 |
-
);
|
| 21 |
-
|
| 22 |
-
const DEFAULT_OUT_PATH = path.join(
|
| 23 |
-
PROJECT_ROOT,
|
| 24 |
-
'gold',
|
| 25 |
-
'pipeline_gold.jsonl',
|
| 26 |
-
);
|
| 27 |
-
|
| 28 |
-
function preview(value, max = 400) {
|
| 29 |
-
if (value == null) return '';
|
| 30 |
-
let str = typeof value === 'string' ? value : JSON.stringify(value, null, 2);
|
| 31 |
-
if (str.length > max) {
|
| 32 |
-
return str.slice(0, max) + `… [truncated ${str.length - max} chars]`;
|
| 33 |
-
}
|
| 34 |
-
return str;
|
| 35 |
-
}
|
| 36 |
-
|
| 37 |
-
/**
|
| 38 |
-
* Load JSONL seed questions.
|
| 39 |
-
* Each line may be:
|
| 40 |
-
* - { "question": "..." }
|
| 41 |
-
* - { "prompt": "..." }
|
| 42 |
-
* - { "text": "..." }
|
| 43 |
-
* - or just a raw string
|
| 44 |
-
*/
|
| 45 |
-
export async function loadSeedQuestions(seedsPath = DEFAULT_SEEDS_PATH) {
|
| 46 |
-
const txt = await fs.readFile(seedsPath, 'utf8');
|
| 47 |
-
return txt
|
| 48 |
-
.split('\n')
|
| 49 |
-
.map((l) => l.trim())
|
| 50 |
-
.filter(Boolean)
|
| 51 |
-
.map((line) => JSON.parse(line));
|
| 52 |
-
}
|
| 53 |
-
|
| 54 |
-
/**
|
| 55 |
-
* Extract a question string from a seed record.
|
| 56 |
-
*/
|
| 57 |
-
export function seedToQuestion(seed) {
|
| 58 |
-
if (typeof seed === 'string') return seed;
|
| 59 |
-
return seed.question || seed.prompt || seed.text || '';
|
| 60 |
-
}
|
| 61 |
-
|
| 62 |
-
/**
|
| 63 |
-
* Run a single pipeline step for one question.
|
| 64 |
-
*
|
| 65 |
-
* Orchestrates:
|
| 66 |
-
* retrieval → generator → verifier → reward
|
| 67 |
-
*
|
| 68 |
-
* Returns a structured result:
|
| 69 |
-
* {
|
| 70 |
-
* status: 'accepted' | 'invalid_question' | 'retrieval_failed'
|
| 71 |
-
* | 'generator_failed' | 'verifier_rejected'
|
| 72 |
-
* | 'reward_rejected' | 'verifier_error' | 'reward_error',
|
| 73 |
-
* question,
|
| 74 |
-
* context,
|
| 75 |
-
* gen,
|
| 76 |
-
* ver,
|
| 77 |
-
* rew,
|
| 78 |
-
* error? // optional message
|
| 79 |
-
* }
|
| 80 |
-
*/
|
| 81 |
-
export async function runPipelineStep({
|
| 82 |
-
question,
|
| 83 |
-
retrievalMode = process.env.RETRIEVAL_MODE || 'hybrid',
|
| 84 |
-
k = Number(process.env.RETRIEVAL_K || '6'),
|
| 85 |
-
generatorProvider,
|
| 86 |
-
verifierProvider,
|
| 87 |
-
rewardProvider,
|
| 88 |
-
verbose = false,
|
| 89 |
-
logger = console,
|
| 90 |
-
} = {}) {
|
| 91 |
-
const log = logger?.log?.bind(logger) || console.log;
|
| 92 |
-
const errLog = logger?.error?.bind(logger) || console.error;
|
| 93 |
-
|
| 94 |
-
if (!question || !question.trim()) {
|
| 95 |
-
if (verbose) log(' [pipeline] empty / invalid question, skipping');
|
| 96 |
-
return { status: 'invalid_question', question };
|
| 97 |
-
}
|
| 98 |
-
|
| 99 |
-
const genProv = generatorProvider || loadProviderFor('generator');
|
| 100 |
-
const verProv = verifierProvider || loadProviderFor('verifier');
|
| 101 |
-
const rewProv = rewardProvider || loadProviderFor('reward');
|
| 102 |
-
|
| 103 |
-
// --- Retrieval ---
|
| 104 |
-
let context = [];
|
| 105 |
-
try {
|
| 106 |
-
if (verbose) log(` [retrieval] mode=${retrievalMode} k=${k}`);
|
| 107 |
-
context = await hybridSearch(question, k);
|
| 108 |
-
if (verbose) {
|
| 109 |
-
log(` [retrieval] got ${context.length} chunks`);
|
| 110 |
-
if (context.length > 0) {
|
| 111 |
-
const first = context[0]?.content ?? '';
|
| 112 |
-
log(' [retrieval] first chunk:');
|
| 113 |
-
log(' ' + preview(first, 200).replace(/\n/g, '\n '));
|
| 114 |
-
}
|
| 115 |
-
}
|
| 116 |
-
} catch (e) {
|
| 117 |
-
const msg = e?.message || String(e);
|
| 118 |
-
if (verbose) errLog(' [retrieval] ERROR:', msg);
|
| 119 |
-
return {
|
| 120 |
-
status: 'retrieval_failed',
|
| 121 |
-
question,
|
| 122 |
-
error: msg,
|
| 123 |
-
};
|
| 124 |
-
}
|
| 125 |
-
|
| 126 |
-
// --- Generator ---
|
| 127 |
-
let gen;
|
| 128 |
-
try {
|
| 129 |
-
if (verbose) log(' [generator] calling model…');
|
| 130 |
-
// NOTE: runGenerator(query, contextChunks, provider)
|
| 131 |
-
gen = await runGenerator(question, context, genProv);
|
| 132 |
-
if (verbose) {
|
| 133 |
-
log(' [generator] raw:');
|
| 134 |
-
log(' ' + preview(gen.raw ?? '', 400).replace(/\n/g, '\n '));
|
| 135 |
-
log(' [generator] parsed:');
|
| 136 |
-
log(' ' + preview(gen.parsed, 400).replace(/\n/g, '\n '));
|
| 137 |
-
}
|
| 138 |
-
} catch (e) {
|
| 139 |
-
const msg = e?.message || String(e);
|
| 140 |
-
if (verbose) errLog(' [generator] ERROR:', msg);
|
| 141 |
-
return {
|
| 142 |
-
status: 'generator_failed',
|
| 143 |
-
question,
|
| 144 |
-
context,
|
| 145 |
-
error: msg,
|
| 146 |
-
};
|
| 147 |
-
}
|
| 148 |
-
|
| 149 |
-
// --- Verifier ---
|
| 150 |
-
let ver;
|
| 151 |
-
try {
|
| 152 |
-
if (verbose) log(' [verifier] calling model…');
|
| 153 |
-
// NOTE: runVerifier(sample, provider)
|
| 154 |
-
ver = await runVerifier(gen, verProv);
|
| 155 |
-
if (verbose) {
|
| 156 |
-
log(' [verifier] parsed:');
|
| 157 |
-
log(' ' + preview(ver.parsed, 400).replace(/\n/g, '\n '));
|
| 158 |
-
log(` [verifier] ok=${ver.ok === true}`);
|
| 159 |
-
}
|
| 160 |
-
} catch (e) {
|
| 161 |
-
const msg = e?.message || String(e);
|
| 162 |
-
if (verbose) errLog(' [verifier] ERROR:', msg);
|
| 163 |
-
return {
|
| 164 |
-
status: 'verifier_error',
|
| 165 |
-
question,
|
| 166 |
-
context,
|
| 167 |
-
gen,
|
| 168 |
-
error: msg,
|
| 169 |
-
};
|
| 170 |
-
}
|
| 171 |
-
|
| 172 |
-
if (!ver || ver.ok !== true) {
|
| 173 |
-
if (verbose) log(' [verifier] rejected sample');
|
| 174 |
-
return {
|
| 175 |
-
status: 'verifier_rejected',
|
| 176 |
-
question,
|
| 177 |
-
context,
|
| 178 |
-
gen,
|
| 179 |
-
ver,
|
| 180 |
-
};
|
| 181 |
-
}
|
| 182 |
-
|
| 183 |
-
// --- Reward ---
|
| 184 |
-
let rew;
|
| 185 |
-
try {
|
| 186 |
-
if (verbose) log(' [reward] calling model…');
|
| 187 |
-
// NOTE: runReward(sample, provider)
|
| 188 |
-
rew = await runReward(gen, rewProv);
|
| 189 |
-
if (verbose) {
|
| 190 |
-
log(' [reward] parsed:');
|
| 191 |
-
log(' ' + preview(rew.parsed, 400).replace(/\n/g, '\n '));
|
| 192 |
-
log(` [reward] score=${rew.score} ok=${rew.ok}`);
|
| 193 |
-
}
|
| 194 |
-
} catch (e) {
|
| 195 |
-
const msg = e?.message || String(e);
|
| 196 |
-
if (verbose) errLog(' [reward] ERROR:', msg);
|
| 197 |
-
return {
|
| 198 |
-
status: 'reward_error',
|
| 199 |
-
question,
|
| 200 |
-
context,
|
| 201 |
-
gen,
|
| 202 |
-
ver,
|
| 203 |
-
error: msg,
|
| 204 |
-
};
|
| 205 |
-
}
|
| 206 |
-
|
| 207 |
-
if (!rew || rew.ok !== true) {
|
| 208 |
-
if (verbose) log(' [reward] rejected sample');
|
| 209 |
-
return {
|
| 210 |
-
status: 'reward_rejected',
|
| 211 |
-
question,
|
| 212 |
-
context,
|
| 213 |
-
gen,
|
| 214 |
-
ver,
|
| 215 |
-
rew,
|
| 216 |
-
};
|
| 217 |
-
}
|
| 218 |
-
|
| 219 |
-
if (verbose) log(' [pipeline] accepted ✅');
|
| 220 |
-
|
| 221 |
-
return {
|
| 222 |
-
status: 'accepted',
|
| 223 |
-
question,
|
| 224 |
-
context,
|
| 225 |
-
gen,
|
| 226 |
-
ver,
|
| 227 |
-
rew,
|
| 228 |
-
};
|
| 229 |
-
}
|
| 230 |
-
|
| 231 |
-
/**
|
| 232 |
-
* Append a single accepted record to a JSONL file.
|
| 233 |
-
*/
|
| 234 |
-
export async function appendGoldRecord(outPath, record) {
|
| 235 |
-
const line = JSON.stringify(record) + '\n';
|
| 236 |
-
await fs.mkdir(path.dirname(outPath), { recursive: true });
|
| 237 |
-
await fs.appendFile(outPath, line, 'utf8');
|
| 238 |
-
}
|
| 239 |
-
|
| 240 |
-
/**
|
| 241 |
-
* Run the pipeline over a batch of seed questions and write accepted
|
| 242 |
-
* samples to a JSONL file.
|
| 243 |
-
*
|
| 244 |
-
* Options:
|
| 245 |
-
* - seedsPath: JSONL of seeds (defaults to test_samples/seed_questions.jsonl)
|
| 246 |
-
* - outPath: output JSONL (defaults to gold/pipeline_gold.jsonl)
|
| 247 |
-
* - limit: max number of seeds to process
|
| 248 |
-
* - verbose: extra per-stage logging
|
| 249 |
-
* - logger: optional logger (defaults to console)
|
| 250 |
-
*
|
| 251 |
-
* Returns:
|
| 252 |
-
* { total, processed, accepted, outPath, statusCounts }
|
| 253 |
-
*/
|
| 254 |
-
export async function runPipelineBatch({
|
| 255 |
-
seedsPath = DEFAULT_SEEDS_PATH,
|
| 256 |
-
outPath = DEFAULT_OUT_PATH,
|
| 257 |
-
limit,
|
| 258 |
-
verbose = false,
|
| 259 |
-
logger = console,
|
| 260 |
-
} = {}) {
|
| 261 |
-
const log = logger?.log?.bind(logger) || console.log;
|
| 262 |
-
const errLog = logger?.error?.bind(logger) || console.error;
|
| 263 |
-
|
| 264 |
-
const seeds = await loadSeedQuestions(seedsPath);
|
| 265 |
-
const max = typeof limit === 'number' ? limit : seeds.length;
|
| 266 |
-
|
| 267 |
-
let processed = 0;
|
| 268 |
-
let accepted = 0;
|
| 269 |
-
const statusCounts = {};
|
| 270 |
-
|
| 271 |
-
for (let idx = 0; idx < max; idx++) {
|
| 272 |
-
const seed = seeds[idx];
|
| 273 |
-
const question = seedToQuestion(seed);
|
| 274 |
-
const label = `[${idx + 1}/${max}]`;
|
| 275 |
-
|
| 276 |
-
log(`→ ${label} Running pipeline for: "${question}"`);
|
| 277 |
-
|
| 278 |
-
try {
|
| 279 |
-
const result = await runPipelineStep({
|
| 280 |
-
question,
|
| 281 |
-
verbose,
|
| 282 |
-
logger,
|
| 283 |
-
});
|
| 284 |
-
|
| 285 |
-
processed += 1;
|
| 286 |
-
statusCounts[result.status] =
|
| 287 |
-
(statusCounts[result.status] || 0) + 1;
|
| 288 |
-
|
| 289 |
-
if (verbose) {
|
| 290 |
-
log(` ↳ status: ${result.status}`);
|
| 291 |
-
}
|
| 292 |
-
|
| 293 |
-
if (result.status === 'accepted') {
|
| 294 |
-
const record = {
|
| 295 |
-
question,
|
| 296 |
-
context: result.context,
|
| 297 |
-
sample: result.gen, // generator output
|
| 298 |
-
verifier: result.ver,
|
| 299 |
-
reward: result.rew,
|
| 300 |
-
};
|
| 301 |
-
|
| 302 |
-
await appendGoldRecord(outPath, record);
|
| 303 |
-
accepted += 1;
|
| 304 |
-
}
|
| 305 |
-
} catch (e) {
|
| 306 |
-
const msg = e?.message || String(e);
|
| 307 |
-
processed += 1;
|
| 308 |
-
statusCounts.pipeline_error =
|
| 309 |
-
(statusCounts.pipeline_error || 0) + 1;
|
| 310 |
-
errLog(' [pipeline] ERROR:', msg);
|
| 311 |
-
}
|
| 312 |
-
}
|
| 313 |
-
|
| 314 |
-
return {
|
| 315 |
-
total: seeds.length,
|
| 316 |
-
processed,
|
| 317 |
-
accepted,
|
| 318 |
-
outPath,
|
| 319 |
-
statusCounts,
|
| 320 |
-
};
|
| 321 |
-
}
|
|
|
|
| 1 |
// src/pipeline/pipeline.mjs
|
| 2 |
+
// Thin façade that exposes the public pipeline API by re-exporting
|
| 3 |
+
// from the internal modules. This keeps imports stable while the
|
| 4 |
+
// implementation is split into smaller files.
|
| 5 |
+
|
| 6 |
+
export * from './util.mjs';
|
| 7 |
+
export * from './seeds.mjs';
|
| 8 |
+
export * from './step.mjs';
|
| 9 |
+
export * from './batch.mjs';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/pipeline/seeds.mjs
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// src/pipeline/seeds.mjs
|
| 2 |
+
import fs from 'fs/promises';
|
| 3 |
+
import path from 'path';
|
| 4 |
+
import { PROJECT_ROOT } from './util.mjs';
|
| 5 |
+
|
| 6 |
+
export const DEFAULT_SEEDS_PATH = path.join(
|
| 7 |
+
PROJECT_ROOT,
|
| 8 |
+
'test_samples',
|
| 9 |
+
'seed_questions.jsonl',
|
| 10 |
+
);
|
| 11 |
+
|
| 12 |
+
export const DEFAULT_OUT_PATH = path.join(
|
| 13 |
+
PROJECT_ROOT,
|
| 14 |
+
'gold',
|
| 15 |
+
'pipeline_gold.jsonl',
|
| 16 |
+
);
|
| 17 |
+
|
| 18 |
+
/**
|
| 19 |
+
* Load JSONL seed questions or chunks.
|
| 20 |
+
* Each line may be:
|
| 21 |
+
* - { "question": "..." }
|
| 22 |
+
* - { "prompt": "..." }
|
| 23 |
+
* - { "text": "..." }
|
| 24 |
+
* - or just a raw string
|
| 25 |
+
*/
|
| 26 |
+
export async function loadSeedQuestions(seedsPath = DEFAULT_SEEDS_PATH) {
|
| 27 |
+
const txt = await fs.readFile(seedsPath, 'utf8');
|
| 28 |
+
return txt
|
| 29 |
+
.split('\n')
|
| 30 |
+
.map((l) => l.trim())
|
| 31 |
+
.filter(Boolean)
|
| 32 |
+
.map((line) => JSON.parse(line));
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
/**
|
| 36 |
+
* Extract a question string from a seed record.
|
| 37 |
+
*/
|
| 38 |
+
export function seedToQuestion(seed) {
|
| 39 |
+
if (typeof seed === 'string') return seed;
|
| 40 |
+
return seed.question || seed.prompt || seed.text || '';
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
/**
|
| 44 |
+
* Extract a chunk of text from a seed record (for question-first mode).
|
| 45 |
+
*/
|
| 46 |
+
export function seedToContextText(seed) {
|
| 47 |
+
if (typeof seed === 'string') return seed;
|
| 48 |
+
return (
|
| 49 |
+
seed.text ||
|
| 50 |
+
seed.content ||
|
| 51 |
+
seed.context ||
|
| 52 |
+
seed.question || // fallback if someone stored full Q+answer text here
|
| 53 |
+
seed.prompt ||
|
| 54 |
+
''
|
| 55 |
+
);
|
| 56 |
+
}
|
src/pipeline/step.mjs
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// src/pipeline/step.mjs
|
| 2 |
+
import { loadProviderFor } from '../providers/provider.mjs';
|
| 3 |
+
import { hybridSearch } from '../retrieval/retrieval.mjs';
|
| 4 |
+
import { runGenerator } from '../generator/generator_core.mjs';
|
| 5 |
+
import { runVerifier } from '../verifier/verifier_core.mjs';
|
| 6 |
+
import { runReward } from '../reward/reward_core.mjs';
|
| 7 |
+
import { preview } from './util.mjs';
|
| 8 |
+
|
| 9 |
+
/**
|
| 10 |
+
* Run a single pipeline step for one question.
|
| 11 |
+
*
|
| 12 |
+
* Orchestrates:
|
| 13 |
+
* retrieval → generator → verifier → reward
|
| 14 |
+
*
|
| 15 |
+
* Returns a structured result:
|
| 16 |
+
* {
|
| 17 |
+
* status: 'accepted' | 'invalid_question' | 'retrieval_failed'
|
| 18 |
+
* | 'generator_failed' | 'verifier_rejected'
|
| 19 |
+
* | 'reward_rejected' | 'verifier_error' | 'reward_error',
|
| 20 |
+
* question,
|
| 21 |
+
* context,
|
| 22 |
+
* gen,
|
| 23 |
+
* ver,
|
| 24 |
+
* rew,
|
| 25 |
+
* error? // optional message
|
| 26 |
+
* }
|
| 27 |
+
*/
|
| 28 |
+
export async function runPipelineStep({
|
| 29 |
+
question,
|
| 30 |
+
retrievalMode = process.env.RETRIEVAL_MODE || 'hybrid',
|
| 31 |
+
k = Number(process.env.RETRIEVAL_K || '6'),
|
| 32 |
+
generatorProvider,
|
| 33 |
+
verifierProvider,
|
| 34 |
+
rewardProvider,
|
| 35 |
+
verbose = false,
|
| 36 |
+
logger = console,
|
| 37 |
+
} = {}) {
|
| 38 |
+
const log = logger?.log?.bind(logger) || console.log;
|
| 39 |
+
const errLog = logger?.error?.bind(logger) || console.error;
|
| 40 |
+
|
| 41 |
+
if (!question || !question.trim()) {
|
| 42 |
+
if (verbose) log(' [pipeline] empty / invalid question, skipping');
|
| 43 |
+
return { status: 'invalid_question', question };
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
const genProv = generatorProvider || loadProviderFor('generator');
|
| 47 |
+
const verProv = verifierProvider || loadProviderFor('verifier');
|
| 48 |
+
const rewProv = rewardProvider || loadProviderFor('reward');
|
| 49 |
+
|
| 50 |
+
// --- Retrieval ---
|
| 51 |
+
let context = [];
|
| 52 |
+
try {
|
| 53 |
+
if (verbose) log(` [retrieval] mode=${retrievalMode} k=${k}`);
|
| 54 |
+
context = await hybridSearch(question, k);
|
| 55 |
+
if (verbose) {
|
| 56 |
+
log(` [retrieval] got ${context.length} chunks`);
|
| 57 |
+
if (context.length > 0) {
|
| 58 |
+
const first = context[0]?.content ?? '';
|
| 59 |
+
log(' [retrieval] first chunk:');
|
| 60 |
+
log(' ' + preview(first, 200).replace(/\n/g, '\n '));
|
| 61 |
+
}
|
| 62 |
+
}
|
| 63 |
+
} catch (e) {
|
| 64 |
+
const msg = e?.message || String(e);
|
| 65 |
+
if (verbose) errLog(' [retrieval] ERROR:', msg);
|
| 66 |
+
return {
|
| 67 |
+
status: 'retrieval_failed',
|
| 68 |
+
question,
|
| 69 |
+
error: msg,
|
| 70 |
+
};
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
// --- Generator ---
|
| 74 |
+
let gen;
|
| 75 |
+
try {
|
| 76 |
+
if (verbose) log(' [generator] calling model…');
|
| 77 |
+
// NOTE: runGenerator(query, contextChunks, provider)
|
| 78 |
+
gen = await runGenerator(question, context, genProv);
|
| 79 |
+
if (verbose) {
|
| 80 |
+
log(' [generator] raw:');
|
| 81 |
+
log(' ' + preview(gen.raw ?? '', 400).replace(/\n/g, '\n '));
|
| 82 |
+
log(' [generator] parsed:');
|
| 83 |
+
log(' ' + preview(gen.parsed, 400).replace(/\n/g, '\n '));
|
| 84 |
+
}
|
| 85 |
+
} catch (e) {
|
| 86 |
+
const msg = e?.message || String(e);
|
| 87 |
+
if (verbose) errLog(' [generator] ERROR:', msg);
|
| 88 |
+
return {
|
| 89 |
+
status: 'generator_failed',
|
| 90 |
+
question,
|
| 91 |
+
context,
|
| 92 |
+
error: msg,
|
| 93 |
+
};
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
// --- Verifier ---
|
| 97 |
+
let ver;
|
| 98 |
+
try {
|
| 99 |
+
if (verbose) log(' [verifier] calling model…');
|
| 100 |
+
// NOTE: runVerifier(sample, provider)
|
| 101 |
+
ver = await runVerifier(gen, verProv);
|
| 102 |
+
if (verbose) {
|
| 103 |
+
log(' [verifier] parsed:');
|
| 104 |
+
log(' ' + preview(ver.parsed, 400).replace(/\n/g, '\n '));
|
| 105 |
+
log(` [verifier] ok=${ver.ok === true}`);
|
| 106 |
+
}
|
| 107 |
+
} catch (e) {
|
| 108 |
+
const msg = e?.message || String(e);
|
| 109 |
+
if (verbose) errLog(' [verifier] ERROR:', msg);
|
| 110 |
+
return {
|
| 111 |
+
status: 'verifier_error',
|
| 112 |
+
question,
|
| 113 |
+
context,
|
| 114 |
+
gen,
|
| 115 |
+
error: msg,
|
| 116 |
+
};
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
if (!ver || ver.ok !== true) {
|
| 120 |
+
if (verbose) log(' [verifier] rejected sample');
|
| 121 |
+
return {
|
| 122 |
+
status: 'verifier_rejected',
|
| 123 |
+
question,
|
| 124 |
+
context,
|
| 125 |
+
gen,
|
| 126 |
+
ver,
|
| 127 |
+
};
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
// --- Reward ---
|
| 131 |
+
let rew;
|
| 132 |
+
try {
|
| 133 |
+
if (verbose) log(' [reward] calling model…');
|
| 134 |
+
// NOTE: runReward(sample, provider)
|
| 135 |
+
rew = await runReward(gen, rewProv);
|
| 136 |
+
if (verbose) {
|
| 137 |
+
log(' [reward] parsed:');
|
| 138 |
+
log(' ' + preview(rew.parsed, 400).replace(/\n/g, '\n '));
|
| 139 |
+
log(` [reward] score=${rew.score} ok=${rew.ok}`);
|
| 140 |
+
}
|
| 141 |
+
} catch (e) {
|
| 142 |
+
const msg = e?.message || String(e);
|
| 143 |
+
if (verbose) errLog(' [reward] ERROR:', msg);
|
| 144 |
+
return {
|
| 145 |
+
status: 'reward_error',
|
| 146 |
+
question,
|
| 147 |
+
context,
|
| 148 |
+
gen,
|
| 149 |
+
ver,
|
| 150 |
+
error: msg,
|
| 151 |
+
};
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
if (!rew || rew.ok !== true) {
|
| 155 |
+
if (verbose) log(' [reward] rejected sample');
|
| 156 |
+
return {
|
| 157 |
+
status: 'reward_rejected',
|
| 158 |
+
question,
|
| 159 |
+
context,
|
| 160 |
+
gen,
|
| 161 |
+
ver,
|
| 162 |
+
rew,
|
| 163 |
+
};
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
if (verbose) log(' [pipeline] accepted ✅');
|
| 167 |
+
|
| 168 |
+
return {
|
| 169 |
+
status: 'accepted',
|
| 170 |
+
question,
|
| 171 |
+
context,
|
| 172 |
+
gen,
|
| 173 |
+
ver,
|
| 174 |
+
rew,
|
| 175 |
+
};
|
| 176 |
+
}
|
src/pipeline/util.mjs
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// src/pipeline/util.mjs
|
| 2 |
+
import path from 'path';
|
| 3 |
+
import { fileURLToPath } from 'url';
|
| 4 |
+
|
| 5 |
+
const __filename = fileURLToPath(import.meta.url);
|
| 6 |
+
const __dirname = path.dirname(__filename);
|
| 7 |
+
|
| 8 |
+
export const PROJECT_ROOT = path.join(__dirname, '..', '..');
|
| 9 |
+
|
| 10 |
+
/**
|
| 11 |
+
* Short preview of large strings/objects for logging.
|
| 12 |
+
*/
|
| 13 |
+
export function preview(value, max = 400) {
|
| 14 |
+
if (value == null) return '';
|
| 15 |
+
let str = typeof value === 'string' ? value : JSON.stringify(value, null, 2);
|
| 16 |
+
if (str.length > max) {
|
| 17 |
+
return str.slice(0, max) + `… [truncated ${str.length - max} chars]`;
|
| 18 |
+
}
|
| 19 |
+
return str;
|
| 20 |
+
}
|