| | |
| | |
| |
|
| | import math |
| | import os |
| | import argparse |
| | import warnings |
| | from typing import List, Tuple |
| | from vllm import LLM |
| | import pandas as pd |
| | import numpy as np |
| | import wandb |
| |
|
| | def build_pair_prompts(chosen_prompt: str, |
| | chosen: str, |
| | reject: str, |
| | joiner: str = "\n") -> Tuple[str, str]: |
| | """为一条样本构造两条输入:prompt_chosen, prompt_reject。""" |
| | cp = chosen_prompt if chosen_prompt is not None else "" |
| | ch = chosen if chosen is not None else "" |
| | rj = reject if reject is not None else "" |
| | |
| | cp = cp.rstrip() |
| | ch = ch.rstrip() |
| | rj = rj.rstrip() |
| | prompt_chosen = f"{cp}{joiner}{ch}" |
| | prompt_reject = f"{cp}{joiner}{rj}" |
| | return prompt_chosen, prompt_reject |
| |
|
| | data_path="/home/data/raw/test/1159-L6_format_full_label_v5.0safe.parquet" |
| | if not os.path.exists(data_path): |
| | raise FileNotFoundError(f"数据文件不存在:{data_path}") |
| |
|
| | df = pd.read_parquet(data_path) |
| | required_cols = ["chosen_prompt", "chosen", "reject"] |
| | for c in required_cols: |
| | if c not in df.columns: |
| | raise ValueError( |
| | f"数据缺少必要列 `{c}`,实际列为:{list(df.columns)}" |
| | ) |
| | num_samples = len(df) |
| | print(f"Loaded {num_samples} samples from {data_path}") |
| | llm = LLM(model="/home/rm5.0_9e-6", runner="pooling", convert="reward") |
| |
|
| | results = [] |
| | correct_cnt = 0 |
| | total_cnt = 0 |
| | batch_size=16 |
| | |
| | num_batches = math.ceil(num_samples/batch_size) |
| |
|
| | print("\nStart pairwise reward evaluation...\n" + "-" * 70) |
| | for b in range(num_batches): |
| | start = b * batch_size |
| | end = min((b + 1) * batch_size, num_samples) |
| | batch = df.iloc[start:end] |
| |
|
| | |
| | pair_prompts = [] |
| | indices = [] |
| | for idx, row in batch.iterrows(): |
| | prompt_chosen, prompt_reject = build_pair_prompts( |
| | row["chosen_prompt"], row["chosen"], row["reject"], joiner="" |
| | ) |
| | |
| | pair_prompts.append(prompt_chosen) |
| | pair_prompts.append(prompt_reject) |
| | indices.append(idx) |
| |
|
| | |
| | try: |
| | outputs = llm.reward(pair_prompts) |
| | except Exception as e: |
| | warnings.warn(f"llm.reward 执行失败(batch={b}):{e}") |
| | |
| | continue |
| |
|
| | |
| | |
| | |
| | scalar_scores = [] |
| | for out in outputs: |
| | |
| | score = out.outputs.data[-1] |
| | |
| | if np.isnan(score): |
| | score = -1e30 |
| | scalar_scores.append(score) |
| |
|
| | |
| | for i, idx in enumerate(indices): |
| | chosen_score = scalar_scores[2 * i] |
| | reject_score = scalar_scores[2 * i + 1] |
| | total_cnt += 1 |
| | is_correct = chosen_score > reject_score |
| | correct_cnt += int(is_correct) |
| | running_acc = correct_cnt / total_cnt |
| |
|
| | |
| | print( |
| | f"[{total_cnt:6d}] " |
| | f"Chosen={chosen_score:.6f} | Reject={reject_score:.6f} | " |
| | f"Correct={is_correct} | RunningAcc={running_acc*100:.2f}%" |
| | ) |
| |
|
| | |
| | |
| | |
| | final_acc = (correct_cnt / total_cnt) if total_cnt > 0 else 0.0 |
| | print("\n" + "-" * 70) |
| | print(f"Finished. Total={total_cnt}, Correct={correct_cnt}, " |
| | f"FinalAcc={final_acc*100:.2f}%") |
| |
|
| |
|
| |
|
| |
|
| |
|