#!/bin/bash # # AIPF 灵活评估脚本 —— 支持选择比较轮次 + 是否做 embedding warm-start # 详细说明见 README_eval_flex.sh # set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" # ========== 解析参数 ========== ROUNDS=8 WARMSTART="none" SCENARIO="yss" LIMIT="" EMB_BATCH_SIZE=4 while [[ $# -gt 0 ]]; do case "$1" in --rounds) ROUNDS="$2"; shift 2 ;; --warmstart) WARMSTART="$2"; shift 2 ;; --scenario) SCENARIO="$2"; shift 2 ;; --limit) LIMIT="$2"; shift 2 ;; --emb-batch-size) EMB_BATCH_SIZE="$2"; shift 2 ;; *) echo "[ERROR] 未知参数: $1"; exit 1 ;; esac done DATE="${DATE:-$(date +%Y%m%d)}" RUN_ID="${RUN_ID:-$(date +%Y%m%d_%H%M%S)}" export PYTHONPATH="${SCRIPT_DIR}:${SCRIPT_DIR}/vendor/ranking_moderation/src:${PYTHONPATH:-}" # ========== 场景定义 ========== # 格式: "短名|example子目录|cg名|golden_csv文件名" declare -A SCENARIO_MAP SCENARIO_MAP[yss]="yss_ruler_eval|youth_sexual_and_physical_abuse|aipf_golden_set.csv" SCENARIO_MAP[nsa]="nsa_ruler_eval|ansa|nsa_golden_set.csv" # ========== 标签 ========== TAG="heuristic_r${ROUNDS}" if [[ "${WARMSTART}" != "none" ]]; then TAG="${TAG}_warm_${WARMSTART}" fi echo "======================================================" echo " 配置:" echo " rounds = ${ROUNDS}" echo " warmstart = ${WARMSTART}" echo " scenario = ${SCENARIO}" echo " date = ${DATE}" echo " run_id = ${RUN_ID}" echo " tag = ${TAG}" echo "======================================================" # ========== 主流程 ========== run_scenario() { local short_name="$1" local eval_dir="$2" local cg_name="$3" local golden_csv="$4" local example_root="${SCRIPT_DIR}/aipf_example/${eval_dir}" local base_pipeline_yaml="${example_root}/pipeline.yaml" local input_csv="${example_root}/data/${golden_csv}" local workspace="${example_root}/runs/${DATE}/${RUN_ID}_${TAG}" echo "" echo "======================================================" echo " 场景: ${short_name} (${cg_name})" echo " Pipeline(base): ${base_pipeline_yaml}" echo " Golden set: ${input_csv}" echo " Workspace: ${workspace}" echo "======================================================" if [[ ! -f "${base_pipeline_yaml}" ]]; then echo "[ERROR] pipeline.yaml 不存在: ${base_pipeline_yaml}"; return 1 fi if [[ ! -f "${input_csv}" ]]; then echo "[ERROR] golden set 不存在: ${input_csv}"; return 1 fi mkdir -p "${workspace}/intermediate" "${workspace}/configs/pos_config" "${workspace}/outputs" # ---------- Step 0: embedding warm-start (可选) ---------- local actual_csv="${input_csv}" if [[ "${WARMSTART}" != "none" ]]; then echo "" echo "[Step 0] Embedding warm-start (${WARMSTART}) ..." local topk emb_script emb_output if [[ "${WARMSTART}" == "top5" ]]; then topk=5 emb_script="${SCRIPT_DIR}/batch_top5_match.py" emb_output="${workspace}/intermediate/emb_top5.jsonl" elif [[ "${WARMSTART}" == "top100" ]]; then topk=100 emb_script="${SCRIPT_DIR}/batch_top100_match.py" emb_output="${workspace}/intermediate/emb_top100.jsonl" else echo "[ERROR] 未知 warmstart 模式: ${WARMSTART} (可选: top5 / top100 / none)" return 1 fi local limit_args="" if [[ -n "${LIMIT}" ]]; then limit_args="--limit ${LIMIT}" fi # 0a) 跑 embedding 匹配 python3 "${emb_script}" \ --csv "${input_csv}" \ --output "${emb_output}" \ --top-k "${topk}" \ --batch-size "${EMB_BATCH_SIZE}" \ --cache-dir "${SCRIPT_DIR}/cache_emb" \ ${limit_args} # 0b) 把 estimated_position 写回一份新的 csv(不污染原始文件) actual_csv="${workspace}/intermediate/golden_with_warmstart.csv" cp "${input_csv}" "${actual_csv}" python3 "${SCRIPT_DIR}/add_estimated_position.py" \ --csv "${actual_csv}" \ --jsonl "${emb_output}" \ --output "${actual_csv}" \ --k "${topk}" echo "[Step 0] done. warm-start csv: ${actual_csv}" fi # ---------- Step 1: 生成临时 pipeline.yaml (覆盖 rounds) ---------- local runtime_pipeline_yaml="${workspace}/configs/pipeline_runtime.yaml" python3 -c " import yaml with open('${base_pipeline_yaml}', 'r') as f: cfg = yaml.safe_load(f) cfg['find_positions']['ranking']['num_rounds'] = ${ROUNDS} cfg['find_positions']['ranking']['search_method'] = 'heuristic_search' with open('${runtime_pipeline_yaml}', 'w') as f: yaml.safe_dump(cfg, f, allow_unicode=True, sort_keys=False) print(f' num_rounds = ${ROUNDS}, search_method = heuristic_search') " echo "[Step 1] 生成运行时 pipeline.yaml: ${runtime_pipeline_yaml}" # ---------- Step 2: 准备评估数据 ---------- echo "" echo "[Step 2] 准备本地评估数据 ..." python3 "${SCRIPT_DIR}/pipeline/prepare_local_eval_data.py" \ --input_csv "${actual_csv}" \ --output_jsonl "${workspace}/intermediate/evr_${DATE}_local_eval_input.jsonl" # ---------- Step 3: 生成 find_positions 配置 ---------- echo "" echo "[Step 3] 生成 find_positions 配置 ..." python3 "${SCRIPT_DIR}/pipeline/gen_find_positions_cfg.py" \ --date "${DATE}" \ --run_root "${workspace}" \ --cg "${cg_name}" \ --pipeline_yaml "${runtime_pipeline_yaml}" \ --input_jsonl "${workspace}/intermediate/evr_${DATE}_local_eval_input.jsonl" \ --output_yaml "${workspace}/configs/pos_config/find_positions_${cg_name}_${DATE}.yaml" # ---------- Step 4: 运行 find_positions ---------- echo "" echo "[Step 4] 运行 find_positions (heuristic_search, ${ROUNDS} rounds) ..." python3 "${SCRIPT_DIR}/vendor/ranking_moderation/scripts/find_positions.py" \ --config "${workspace}/configs/pos_config/find_positions_${cg_name}_${DATE}.yaml" # ---------- Step 5: 评估结果 ---------- echo "" echo "[Step 5] 评估结果 ..." python3 "${SCRIPT_DIR}/pipeline/evaluate_local_ruler_results.py" \ --input_jsonl "${workspace}/intermediate/evr_${DATE}_local_eval_input.jsonl" \ --results_dir "${workspace}/outputs/find_positions/${cg_name}_${DATE}" \ --pipeline_yaml "${runtime_pipeline_yaml}" \ --cg "${cg_name}" \ --output_cases_jsonl "${workspace}/outputs/${cg_name}_case_results_${DATE}.jsonl" \ --output_metrics_json "${workspace}/outputs/${cg_name}_metrics_${DATE}.json" echo "" echo "[DONE] ${short_name} (${TAG}) 完成!" echo " 结果: ${workspace}/outputs/${cg_name}_case_results_${DATE}.jsonl" echo " 指标: ${workspace}/outputs/${cg_name}_metrics_${DATE}.json" if [[ -f "${workspace}/outputs/${cg_name}_metrics_${DATE}.json" ]]; then echo "" echo "--- Metrics ---" cat "${workspace}/outputs/${cg_name}_metrics_${DATE}.json" echo "" fi } # ========== 执行 ========== if [[ "${SCENARIO}" == "all" ]]; then scenarios_to_run=(yss nsa) else scenarios_to_run=("${SCENARIO}") fi for s in "${scenarios_to_run[@]}"; do if [[ -z "${SCENARIO_MAP[$s]+x}" ]]; then echo "[ERROR] 未知场景: ${s} (可选: yss / nsa / all)" exit 1 fi IFS='|' read -r eval_dir cg_name golden_csv <<< "${SCENARIO_MAP[$s]}" run_scenario "${s}" "${eval_dir}" "${cg_name}" "${golden_csv}" done echo "" echo "========== 全部完成 (${TAG}) =========="