#!/usr/bin/env bash set -euo pipefail # Continuous instruct-only pipeline runner. # - Uses separate cache/output to avoid mixing with thinking pipeline # - Random-walk over chunks # - No limit: processes all available chunks/questions; loop restarts after completion # # Required: set INSTRUCT_GENERATOR_MODEL (and optionally INSTRUCT_GENERATOR_PROVIDER). # Stop with Ctrl+C. ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" # Load .env if present if [[ -f "$ROOT_DIR/.env" ]]; then set -a source "$ROOT_DIR/.env" set +a fi if [[ -z "${INSTRUCT_GENERATOR_MODEL:-}" ]]; then echo "❌ Please set INSTRUCT_GENERATOR_MODEL to your instruct model." >&2 exit 1 fi while true; do INSTRUCT_PIPELINE=1 \ INSTRUCT_GENERATOR_MODEL="$INSTRUCT_GENERATOR_MODEL" \ INSTRUCT_GENERATOR_PROVIDER="${INSTRUCT_GENERATOR_PROVIDER:-${GENERATOR_PROVIDER:-ollama}}" \ PIPELINE_CACHE_DIR="${PIPELINE_CACHE_DIR:-$ROOT_DIR/data/cache_instruct}" \ PIPELINE_SEED_MODE=question-first \ PIPELINE_RANDOM_WALK=1 \ QUESTION_MAX_PER_CHUNK="${QUESTION_MAX_PER_CHUNK:-5}" \ npm run pipeline -- \ --out "${INSTRUCT_OUT:-$ROOT_DIR/gold/pipeline_gold_instruct.jsonl}" \ --verbose echo "Instruct run finished at $(date). Sleeping 10s before next loop..." sleep 10 done