| | #!/usr/bin/env bash |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | set -u |
| | set -o pipefail |
| |
|
| | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" |
| | PYTHON_SCRIPT="${SCRIPT_DIR}/evaluate_dataset_replay-parallel.py" |
| | MICROMAMBA_ENV="/data/hongzefu/maniskillenv1114" |
| | PYTHON_BIN="${MICROMAMBA_ENV}/bin/python" |
| | DEFAULT_ENV_IDS_CSV="PickXtimes,StopCube,SwingXtimes,BinFill,VideoUnmaskSwap,VideoUnmask,ButtonUnmaskSwap,ButtonUnmask,VideoRepick,VideoPlaceButton,VideoPlaceOrder,PickHighlight,InsertPeg,MoveCube,PatternLock,RouteStick" |
| |
|
| | LOG_ROOT="${SCRIPT_DIR}/logs/evaluate_dataset_replay_parallel" |
| | ACTIVE_RUN_FILE="${LOG_ROOT}/active_run" |
| |
|
| | mkdir -p "${LOG_ROOT}" |
| |
|
| | show_usage() { |
| | echo "Usage: $0 {start|monitor|status|stop|restart} [--env_ids A,B,C]" |
| | echo "" |
| | echo "Commands:" |
| | echo " start [--env_ids A,B,C] Start one process per envid and attach monitor." |
| | echo " If omitted, defaults to all 16 env ids." |
| | echo " monitor Monitor logs of the current active run." |
| | echo " status Show status of the current active run." |
| | echo " stop Stop all processes in the current active run." |
| | echo " restart [--env_ids A,B,C] Stop current active run, then start a new run." |
| | echo " If omitted, defaults to all 16 env ids." |
| | } |
| |
|
| | trim_whitespace() { |
| | local value="$1" |
| | value="${value#"${value%%[![:space:]]*}"}" |
| | value="${value%"${value##*[![:space:]]}"}" |
| | printf "%s" "${value}" |
| | } |
| |
|
| | is_pid_alive() { |
| | local pid="$1" |
| | ps -p "${pid}" > /dev/null 2>&1 |
| | } |
| |
|
| | is_process_tree_alive() { |
| | local pid="$1" |
| | [ -z "${pid}" ] && return 1 |
| | if is_pid_alive "${pid}"; then |
| | return 0 |
| | fi |
| | ps -o pid= --ppid "${pid}" 2>/dev/null | grep -q . |
| | } |
| |
|
| | kill_process_tree() { |
| | local pid="$1" |
| | local signal="${2:-15}" |
| | [ -z "${pid}" ] && return |
| |
|
| | local children |
| | children=$(ps -o pid= --ppid "${pid}" 2>/dev/null) |
| | if [ -n "${children}" ]; then |
| | for child in ${children}; do |
| | kill_process_tree "${child}" "${signal}" |
| | done |
| | fi |
| |
|
| | kill "-${signal}" "${pid}" 2>/dev/null || true |
| | } |
| |
|
| | get_active_run_dir() { |
| | if [ ! -f "${ACTIVE_RUN_FILE}" ]; then |
| | return 1 |
| | fi |
| | local run_dir |
| | run_dir="$(cat "${ACTIVE_RUN_FILE}")" |
| | if [ -z "${run_dir}" ] || [ ! -d "${run_dir}" ]; then |
| | return 1 |
| | fi |
| | printf "%s\n" "${run_dir}" |
| | } |
| |
|
| | parse_env_ids_csv() { |
| | local csv="$1" |
| | IFS=',' read -r -a raw_env_ids <<< "${csv}" |
| | ENV_IDS=() |
| | local env_id |
| | for env_id in "${raw_env_ids[@]}"; do |
| | env_id="$(trim_whitespace "${env_id}")" |
| | if [ -n "${env_id}" ]; then |
| | ENV_IDS+=("${env_id}") |
| | fi |
| | done |
| | if [ "${#ENV_IDS[@]}" -eq 0 ]; then |
| | return 1 |
| | fi |
| | } |
| |
|
| | extract_env_ids_arg() { |
| | local env_ids_csv="" |
| | while [ "$#" -gt 0 ]; do |
| | case "$1" in |
| | --env_ids) |
| | if [ -z "${2:-}" ]; then |
| | echo "Error: --env_ids requires a value." |
| | return 1 |
| | fi |
| | env_ids_csv="$2" |
| | shift 2 |
| | ;; |
| | *) |
| | echo "Error: unknown argument '$1'." |
| | return 1 |
| | ;; |
| | esac |
| | done |
| |
|
| | if [ -z "${env_ids_csv}" ]; then |
| | env_ids_csv="${DEFAULT_ENV_IDS_CSV}" |
| | echo "Info: --env_ids not provided, using default 16 env ids." >&2 |
| | fi |
| |
|
| | printf "%s\n" "${env_ids_csv}" |
| | } |
| |
|
| | validate_runtime() { |
| | if [ ! -f "${PYTHON_SCRIPT}" ]; then |
| | echo "Error: script not found: ${PYTHON_SCRIPT}" |
| | return 1 |
| | fi |
| | if [ ! -d "${MICROMAMBA_ENV}" ]; then |
| | echo "Error: micromamba env not found: ${MICROMAMBA_ENV}" |
| | return 1 |
| | fi |
| | if [ ! -x "${PYTHON_BIN}" ]; then |
| | echo "Error: python binary not executable: ${PYTHON_BIN}" |
| | return 1 |
| | fi |
| | } |
| |
|
| | run_has_alive_process() { |
| | local run_dir="$1" |
| | local pids_file="${run_dir}/pids.tsv" |
| | [ -f "${pids_file}" ] || return 1 |
| |
|
| | local env_id pid log_file |
| | while IFS=$'\t' read -r env_id pid log_file; do |
| | [ -z "${pid}" ] && continue |
| | if is_process_tree_alive "${pid}"; then |
| | return 0 |
| | fi |
| | done < "${pids_file}" |
| | return 1 |
| | } |
| |
|
| | monitor_run() { |
| | local run_dir="${1:-}" |
| | if [ -z "${run_dir}" ]; then |
| | if ! run_dir="$(get_active_run_dir)"; then |
| | echo "No active run found." |
| | return 1 |
| | fi |
| | fi |
| |
|
| | local pids_file="${run_dir}/pids.tsv" |
| | if [ ! -s "${pids_file}" ]; then |
| | echo "No pids.tsv found for run: ${run_dir}" |
| | return 1 |
| | fi |
| |
|
| | local log_files=() |
| | local env_id pid log_file |
| | while IFS=$'\t' read -r env_id pid log_file; do |
| | [ -z "${log_file}" ] && continue |
| | log_files+=("${log_file}") |
| | done < "${pids_file}" |
| |
|
| | if [ "${#log_files[@]}" -eq 0 ]; then |
| | echo "No log files registered in ${pids_file}" |
| | return 1 |
| | fi |
| |
|
| | echo "Monitoring run: ${run_dir}" |
| | echo "Press Ctrl+C to exit monitor. Processes keep running." |
| | tail -n 0 -F "${log_files[@]}" |
| | } |
| |
|
| | status_run() { |
| | local run_dir |
| | if ! run_dir="$(get_active_run_dir)"; then |
| | echo "Status: no active run." |
| | return 0 |
| | fi |
| |
|
| | local pids_file="${run_dir}/pids.tsv" |
| | if [ ! -f "${pids_file}" ]; then |
| | echo "Status: active_run points to ${run_dir}, but pids.tsv is missing." |
| | return 1 |
| | fi |
| |
|
| | local total=0 |
| | local alive=0 |
| | local env_id pid log_file state |
| | echo "Active run: ${run_dir}" |
| | while IFS=$'\t' read -r env_id pid log_file; do |
| | [ -z "${pid}" ] && continue |
| | total=$((total + 1)) |
| | if is_process_tree_alive "${pid}"; then |
| | state="RUNNING" |
| | alive=$((alive + 1)) |
| | else |
| | state="EXITED" |
| | fi |
| | printf " [%s] pid=%s state=%s log=%s\n" "${env_id}" "${pid}" "${state}" "${log_file}" |
| | done < "${pids_file}" |
| |
|
| | echo "Summary: alive=${alive}/${total}" |
| | } |
| |
|
| | stop_run() { |
| | local run_dir |
| | if ! run_dir="$(get_active_run_dir)"; then |
| | echo "No active run to stop." |
| | return 0 |
| | fi |
| |
|
| | local pids_file="${run_dir}/pids.tsv" |
| | if [ ! -f "${pids_file}" ]; then |
| | echo "pids.tsv missing for run ${run_dir}. Clearing active run pointer." |
| | rm -f "${ACTIVE_RUN_FILE}" |
| | return 0 |
| | fi |
| |
|
| | local pids=() |
| | local env_id pid log_file |
| | while IFS=$'\t' read -r env_id pid log_file; do |
| | [ -z "${pid}" ] && continue |
| | pids+=("${pid}") |
| | done < "${pids_file}" |
| |
|
| | if [ "${#pids[@]}" -eq 0 ]; then |
| | echo "No PIDs recorded for run ${run_dir}." |
| | rm -f "${ACTIVE_RUN_FILE}" |
| | return 0 |
| | fi |
| |
|
| | echo "Stopping run: ${run_dir}" |
| | local p |
| | for p in "${pids[@]}"; do |
| | kill_process_tree "${p}" 15 |
| | done |
| |
|
| | local i has_alive |
| | for i in {1..15}; do |
| | has_alive=0 |
| | for p in "${pids[@]}"; do |
| | if is_process_tree_alive "${p}"; then |
| | has_alive=1 |
| | break |
| | fi |
| | done |
| | [ "${has_alive}" -eq 0 ] && break |
| | sleep 1 |
| | done |
| |
|
| | for p in "${pids[@]}"; do |
| | if is_process_tree_alive "${p}"; then |
| | kill_process_tree "${p}" 9 |
| | fi |
| | done |
| | sleep 1 |
| |
|
| | local remaining=0 |
| | for p in "${pids[@]}"; do |
| | if is_process_tree_alive "${p}"; then |
| | remaining=$((remaining + 1)) |
| | fi |
| | done |
| |
|
| | rm -f "${ACTIVE_RUN_FILE}" |
| | if [ "${remaining}" -eq 0 ]; then |
| | echo "Stop complete: all processes from active run have exited." |
| | else |
| | echo "Stop complete with warnings: ${remaining} process trees still alive." |
| | return 1 |
| | fi |
| | } |
| |
|
| | start_run() { |
| | local env_ids_csv="$1" |
| |
|
| | if ! validate_runtime; then |
| | return 1 |
| | fi |
| | if ! parse_env_ids_csv "${env_ids_csv}"; then |
| | echo "Error: --env_ids is empty after parsing." |
| | return 1 |
| | fi |
| |
|
| | local current_run |
| | if current_run="$(get_active_run_dir 2>/dev/null)"; then |
| | if run_has_alive_process "${current_run}"; then |
| | echo "Error: active run is still alive: ${current_run}" |
| | echo "Use: $0 stop" |
| | return 1 |
| | fi |
| | fi |
| |
|
| | local run_id |
| | run_id="$(date +%Y%m%d_%H%M%S)" |
| | local run_dir="${LOG_ROOT}/${run_id}" |
| | mkdir -p "${run_dir}" |
| | local pids_file="${run_dir}/pids.tsv" |
| | : > "${pids_file}" |
| |
|
| | echo "Starting run: ${run_dir}" |
| | local env_id safe_env log_file pid |
| | for env_id in "${ENV_IDS[@]}"; do |
| | safe_env="$(printf "%s" "${env_id}" | tr '/ ' '__')" |
| | log_file="${run_dir}/${safe_env}.log" |
| |
|
| | if command -v stdbuf >/dev/null 2>&1; then |
| | nohup env PATH="${MICROMAMBA_ENV}/bin:${PATH}" \ |
| | PYTHONUNBUFFERED=1 \ |
| | PYTHONIOENCODING=utf-8 \ |
| | stdbuf -oL -eL "${PYTHON_BIN}" -u "${PYTHON_SCRIPT}" --envid "${env_id}" > "${log_file}" 2>&1 & |
| | else |
| | nohup env PATH="${MICROMAMBA_ENV}/bin:${PATH}" \ |
| | PYTHONUNBUFFERED=1 \ |
| | PYTHONIOENCODING=utf-8 \ |
| | "${PYTHON_BIN}" -u "${PYTHON_SCRIPT}" --envid "${env_id}" > "${log_file}" 2>&1 & |
| | fi |
| |
|
| | pid=$! |
| | printf "%s\t%s\t%s\n" "${env_id}" "${pid}" "${log_file}" >> "${pids_file}" |
| | echo " started envid=${env_id} pid=${pid} log=${log_file}" |
| | done |
| |
|
| | printf "%s\n" "${run_dir}" > "${ACTIVE_RUN_FILE}" |
| | echo "Run is detached with nohup. active_run=${run_dir}" |
| | monitor_run "${run_dir}" |
| | } |
| |
|
| | restart_run() { |
| | local env_ids_csv="$1" |
| | stop_run || true |
| | start_run "${env_ids_csv}" |
| | } |
| |
|
| | COMMAND="${1:-}" |
| | case "${COMMAND}" in |
| | start) |
| | shift |
| | ENV_IDS_CSV="$(extract_env_ids_arg "$@")" || { show_usage; exit 1; } |
| | start_run "${ENV_IDS_CSV}" |
| | ;; |
| | monitor) |
| | shift |
| | if [ "$#" -ne 0 ]; then |
| | echo "Error: monitor takes no extra arguments." |
| | show_usage |
| | exit 1 |
| | fi |
| | monitor_run |
| | ;; |
| | status) |
| | shift |
| | if [ "$#" -ne 0 ]; then |
| | echo "Error: status takes no extra arguments." |
| | show_usage |
| | exit 1 |
| | fi |
| | status_run |
| | ;; |
| | stop) |
| | shift |
| | if [ "$#" -ne 0 ]; then |
| | echo "Error: stop takes no extra arguments." |
| | show_usage |
| | exit 1 |
| | fi |
| | stop_run |
| | ;; |
| | restart) |
| | shift |
| | ENV_IDS_CSV="$(extract_env_ids_arg "$@")" || { show_usage; exit 1; } |
| | restart_run "${ENV_IDS_CSV}" |
| | ;; |
| | *) |
| | show_usage |
| | exit 1 |
| | ;; |
| | esac |
| |
|