#!/usr/bin/env bash # Set up the conda env + install the Flow-GRPO codebase + OCR reward. # The repo pins python 3.10.16 / torch 2.6.0, so we use a DEDICATED env # (do NOT pollute the existing 'copygen' env which is py3.12 / torch2.8). set -euo pipefail ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" # repo root (portable; was /workspace/Research/UMM on dev box) # PREFIX env INSIDE /workspace so it persists across sessions (conda envs under # /opt/conda are wiped between turns; only /workspace survives). ENV_PREFIX="$ROOT/envs/flow_grpo" PYBIN="$ENV_PREFIX/bin/python" CONDA=/opt/conda/bin/conda # absolute path (conda not on PATH in non-interactive/tmux shells) if [ ! -x "$PYBIN" ]; then echo ">>> creating prefix env at ${ENV_PREFIX} (python 3.10.16)" "$CONDA" create -y -p "${ENV_PREFIX}" python=3.10.16 fi echo ">>> installing flow_grpo (editable) + pinned deps from setup.py" "$ENV_PREFIX/bin/pip" install -e "$ROOT/third_party/flow_grpo" # OCR reward (PaddleOCR) is OPTIONAL and its pins are fragile (segfaults here). # Block A uses the paddle-free PickScore reward, so skip OCR by default. # Enable with: SETUP_OCR=1 bash scripts/setup_env.sh if [ "${SETUP_OCR:-0}" = "1" ]; then echo ">>> (optional) installing PaddleOCR — non-fatal" "$ENV_PREFIX/bin/pip" install paddlepaddle-gpu==2.6.2 paddleocr==2.9.1 python-Levenshtein \ && "$PYBIN" -c "from paddleocr import PaddleOCR; PaddleOCR(use_angle_cls=False, lang='en', use_gpu=False, show_log=False); print('PaddleOCR ready')" \ || echo "WARN: PaddleOCR setup failed — core env still usable; use PickScore reward" fi echo "" echo "DONE. Env at: $ENV_PREFIX (run with: \"$PYBIN\" ...)" echo "Remember to export HF_HOME=$ROOT/.hf_cache when training."