#!/usr/bin/env bash
# Set up the conda env + install the Flow-GRPO codebase + OCR reward.
# The repo pins python 3.10.16 / torch 2.6.0, so we use a DEDICATED env
# (do NOT pollute the existing 'copygen' env which is py3.12 / torch2.8).
set -euo pipefail

ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"   # repo root (portable; was /workspace/Research/UMM on dev box)
# PREFIX env INSIDE /workspace so it persists across sessions (conda envs under
# /opt/conda are wiped between turns; only /workspace survives).
ENV_PREFIX="$ROOT/envs/flow_grpo"
PYBIN="$ENV_PREFIX/bin/python"

CONDA=/opt/conda/bin/conda     # absolute path (conda not on PATH in non-interactive/tmux shells)
if [ ! -x "$PYBIN" ]; then
  echo ">>> creating prefix env at ${ENV_PREFIX} (python 3.10.16)"
  "$CONDA" create -y -p "${ENV_PREFIX}" python=3.10.16
fi

echo ">>> installing flow_grpo (editable) + pinned deps from setup.py"
"$ENV_PREFIX/bin/pip" install -e "$ROOT/third_party/flow_grpo"

# OCR reward (PaddleOCR) is OPTIONAL and its pins are fragile (segfaults here).
# Block A uses the paddle-free PickScore reward, so skip OCR by default.
# Enable with: SETUP_OCR=1 bash scripts/setup_env.sh
if [ "${SETUP_OCR:-0}" = "1" ]; then
  echo ">>> (optional) installing PaddleOCR — non-fatal"
  "$ENV_PREFIX/bin/pip" install paddlepaddle-gpu==2.6.2 paddleocr==2.9.1 python-Levenshtein \
    && "$PYBIN" -c "from paddleocr import PaddleOCR; PaddleOCR(use_angle_cls=False, lang='en', use_gpu=False, show_log=False); print('PaddleOCR ready')" \
    || echo "WARN: PaddleOCR setup failed — core env still usable; use PickScore reward"
fi

echo ""
echo "DONE. Env at: $ENV_PREFIX  (run with: \"$PYBIN\" ...)"
echo "Remember to export HF_HOME=$ROOT/.hf_cache when training."