File size: 1,284 Bytes
2739b3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/env bash
set -euo pipefail

# Continuous instruct-only pipeline runner.
# - Uses separate cache/output to avoid mixing with thinking pipeline
# - Random-walk over chunks
# - No limit: processes all available chunks/questions; loop restarts after completion
#
# Required: set INSTRUCT_GENERATOR_MODEL (and optionally INSTRUCT_GENERATOR_PROVIDER).
# Stop with Ctrl+C.

ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"

# Load .env if present
if [[ -f "$ROOT_DIR/.env" ]]; then
  set -a
  source "$ROOT_DIR/.env"
  set +a
fi

if [[ -z "${INSTRUCT_GENERATOR_MODEL:-}" ]]; then
  echo "❌ Please set INSTRUCT_GENERATOR_MODEL to your instruct model." >&2
  exit 1
fi

while true; do
  INSTRUCT_PIPELINE=1 \
  INSTRUCT_GENERATOR_MODEL="$INSTRUCT_GENERATOR_MODEL" \
  INSTRUCT_GENERATOR_PROVIDER="${INSTRUCT_GENERATOR_PROVIDER:-${GENERATOR_PROVIDER:-ollama}}" \
  PIPELINE_CACHE_DIR="${PIPELINE_CACHE_DIR:-$ROOT_DIR/data/cache_instruct}" \
  PIPELINE_SEED_MODE=question-first \
  PIPELINE_RANDOM_WALK=1 \
  QUESTION_MAX_PER_CHUNK="${QUESTION_MAX_PER_CHUNK:-5}" \
  npm run pipeline -- \
    --out "${INSTRUCT_OUT:-$ROOT_DIR/gold/pipeline_gold_instruct.jsonl}" \
    --verbose

  echo "Instruct run finished at $(date). Sleeping 10s before next loop..."
  sleep 10
done