| set -euo pipefail | |
| # Continuous instruct-only pipeline runner. | |
| # - Uses separate cache/output to avoid mixing with thinking pipeline | |
| # - Random-walk over chunks | |
| # - No limit: processes all available chunks/questions; loop restarts after completion | |
| # | |
| # Required: set INSTRUCT_GENERATOR_MODEL (and optionally INSTRUCT_GENERATOR_PROVIDER). | |
| # Stop with Ctrl+C. | |
| ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" | |
| # Load .env if present | |
| if [[ -f "$ROOT_DIR/.env" ]]; then | |
| set -a | |
| source "$ROOT_DIR/.env" | |
| set +a | |
| fi | |
| if [[ -z "${INSTRUCT_GENERATOR_MODEL:-}" ]]; then | |
| echo "❌ Please set INSTRUCT_GENERATOR_MODEL to your instruct model." >&2 | |
| exit 1 | |
| fi | |
| while true; do | |
| INSTRUCT_PIPELINE=1 \ | |
| INSTRUCT_GENERATOR_MODEL="$INSTRUCT_GENERATOR_MODEL" \ | |
| INSTRUCT_GENERATOR_PROVIDER="${INSTRUCT_GENERATOR_PROVIDER:-${GENERATOR_PROVIDER:-ollama}}" \ | |
| PIPELINE_CACHE_DIR="${PIPELINE_CACHE_DIR:-$ROOT_DIR/data/cache_instruct}" \ | |
| PIPELINE_SEED_MODE=question-first \ | |
| PIPELINE_RANDOM_WALK=1 \ | |
| QUESTION_MAX_PER_CHUNK="${QUESTION_MAX_PER_CHUNK:-5}" \ | |
| npm run pipeline -- \ | |
| --out "${INSTRUCT_OUT:-$ROOT_DIR/gold/pipeline_gold_instruct.jsonl}" \ | |
| --verbose | |
| echo "Instruct run finished at $(date). Sleeping 10s before next loop..." | |
| sleep 10 | |
| done | |