| # Set up the conda env + install the Flow-GRPO codebase + OCR reward. | |
| # The repo pins python 3.10.16 / torch 2.6.0, so we use a DEDICATED env | |
| # (do NOT pollute the existing 'copygen' env which is py3.12 / torch2.8). | |
| set -euo pipefail | |
| ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" # repo root (portable; was /workspace/Research/UMM on dev box) | |
| # PREFIX env INSIDE /workspace so it persists across sessions (conda envs under | |
| # /opt/conda are wiped between turns; only /workspace survives). | |
| ENV_PREFIX="$ROOT/envs/flow_grpo" | |
| PYBIN="$ENV_PREFIX/bin/python" | |
| CONDA=/opt/conda/bin/conda # absolute path (conda not on PATH in non-interactive/tmux shells) | |
| if [ ! -x "$PYBIN" ]; then | |
| echo ">>> creating prefix env at ${ENV_PREFIX} (python 3.10.16)" | |
| "$CONDA" create -y -p "${ENV_PREFIX}" python=3.10.16 | |
| fi | |
| echo ">>> installing flow_grpo (editable) + pinned deps from setup.py" | |
| "$ENV_PREFIX/bin/pip" install -e "$ROOT/third_party/flow_grpo" | |
| # OCR reward (PaddleOCR) is OPTIONAL and its pins are fragile (segfaults here). | |
| # Block A uses the paddle-free PickScore reward, so skip OCR by default. | |
| # Enable with: SETUP_OCR=1 bash scripts/setup_env.sh | |
| if [ "${SETUP_OCR:-0}" = "1" ]; then | |
| echo ">>> (optional) installing PaddleOCR — non-fatal" | |
| "$ENV_PREFIX/bin/pip" install paddlepaddle-gpu==2.6.2 paddleocr==2.9.1 python-Levenshtein \ | |
| && "$PYBIN" -c "from paddleocr import PaddleOCR; PaddleOCR(use_angle_cls=False, lang='en', use_gpu=False, show_log=False); print('PaddleOCR ready')" \ | |
| || echo "WARN: PaddleOCR setup failed — core env still usable; use PickScore reward" | |
| fi | |
| echo "" | |
| echo "DONE. Env at: $ENV_PREFIX (run with: \"$PYBIN\" ...)" | |
| echo "Remember to export HF_HOME=$ROOT/.hf_cache when training." | |