| | |
| | """ |
| | 需求: |
| | 统计 /home/data/pk-2089-L6_full_label.parquet 中: |
| | (chosen_label == 'safe') 且 (reject_label == 'safe') 且 (chosen_model == 3089) |
| | 的样本数;同时打印总样本数与比率。 |
| | |
| | 依赖:pandas, pyarrow(或 fastparquet) |
| | pip install pandas pyarrow |
| | """ |
| |
|
| | import pandas as pd |
| |
|
| | PATH = "/home/data/raw/test/1159-L6_format_full_label.parquet" |
| | ID=2159 |
| | def norm_label(x) -> str: |
| | if pd.isna(x): |
| | return "" |
| | return str(x).strip().lower() |
| |
|
| | def main(): |
| | df = pd.read_parquet(PATH) |
| |
|
| | |
| | chosen_label = df.get("chosen_label").map(norm_label) |
| | reject_label = df.get("reject_label").map(norm_label) |
| |
|
| | |
| | chosen_model_num = pd.to_numeric(df.get("chosen_model"), errors="coerce") |
| | mask1 = ( |
| | (chosen_label == "safe") & |
| | (reject_label == "safe") & |
| | (chosen_model_num == ID) |
| | ) |
| | mask2 = ( |
| | (chosen_label == "safe") & |
| | (reject_label == "safe") |
| | ) |
| | mask3 = ( |
| | (chosen_label == "unsafe") & |
| | (reject_label == "safe") & |
| | (chosen_model_num == ID) |
| | ) |
| | mask4 = ( |
| | (chosen_label == "unsafe") & |
| | (reject_label == "safe") |
| | ) |
| | mask5 = ( |
| | (chosen_label == "unsafe") & |
| | (reject_label == "unsafe") & |
| | (chosen_model_num == ID) |
| | ) |
| | mask6 = ( |
| | (chosen_label == "unsafe") & |
| | (reject_label == "unsafe") |
| | ) |
| | mask7 =(chosen_label == "safe") |
| | safenum =int(mask7.sum()) |
| | count1 = int(mask1.sum()) |
| | total1 = int(mask2.sum()) |
| | count2 = int(mask3.sum()) |
| | total2 = int(mask4.sum()) |
| | count3 = int(mask5.sum()) |
| | total3 = int(mask6.sum()) |
| | ratio1 = (count1 / total1) if total1 > 0 else 0.0 |
| | ratio2 = (count2 / total2) if total2 > 0 else 0.0 |
| | ratio3 = (count3 / total3) if total3 > 0 else 0.0 |
| | saferatio= (safenum / len(df)) if len(df) > 0 else 0.0 |
| | print(f"安全率={saferatio:.6f} ({safenum}/{len(df)})") |
| | print(f"比率: {ratio1:.6f} ({count1}/{total1})," |
| | f"{ratio2:.6f} ({count2}/{total2})," |
| | f" {ratio3:.6f} ({count3}/{total3})") |
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|