hahayang012
/

rm_code

Model card Files Files and versions

rm_code / win_rate.py

hahayang012's picture

Upload folder using huggingface_hub

d8a76be verified 7 months ago

history blame contribute delete

2.21 kB

	# -- coding: utf-8 --
	"""
	需求：
	统计 /home/data/pk-2089-L6_full_label.parquet 中：
	(chosen_label == 'safe') 且 (reject_label == 'safe') 且 (chosen_model == 3089)
	的样本数；同时打印总样本数与比率。

	依赖：pandas, pyarrow（或 fastparquet）
	pip install pandas pyarrow
	"""

	import pandas as pd

	PATH = "/home/data/raw/test/1159-L6_format_full_label.parquet"
	ID=2159
	def norm_label(x) -> str:
	if pd.isna(x):
	return ""
	return str(x).strip().lower()

	def main():
	df = pd.read_parquet(PATH)

	# 规范化标签为小写去空格
	chosen_label = df.get("chosen_label").map(norm_label)
	reject_label = df.get("reject_label").map(norm_label)

	# 将 chosen_model 转为数值；无法转为数值的置为 NaN
	chosen_model_num = pd.to_numeric(df.get("chosen_model"), errors="coerce")
	mask1 = (
	(chosen_label == "safe") &
	(reject_label == "safe") &
	(chosen_model_num == ID)
	)
	mask2 = (
	(chosen_label == "safe") &
	(reject_label == "safe")
	)
	mask3 = (
	(chosen_label == "unsafe") &
	(reject_label == "safe") &
	(chosen_model_num == ID)
	)
	mask4 = (
	(chosen_label == "unsafe") &
	(reject_label == "safe")
	)
	mask5 = (
	(chosen_label == "unsafe") &
	(reject_label == "unsafe") &
	(chosen_model_num == ID)
	)
	mask6 = (
	(chosen_label == "unsafe") &
	(reject_label == "unsafe")
	)
	mask7 =(chosen_label == "safe")
	safenum =int(mask7.sum())
	count1 = int(mask1.sum())
	total1 = int(mask2.sum())
	count2 = int(mask3.sum())
	total2 = int(mask4.sum())
	count3 = int(mask5.sum())
	total3 = int(mask6.sum())
	ratio1 = (count1 / total1) if total1 > 0 else 0.0
	ratio2 = (count2 / total2) if total2 > 0 else 0.0
	ratio3 = (count3 / total3) if total3 > 0 else 0.0
	saferatio= (safenum / len(df)) if len(df) > 0 else 0.0
	print(f"安全率={saferatio:.6f} ({safenum}/{len(df)})")
	print(f"比率: {ratio1:.6f} ({count1}/{total1}),"
	f"{ratio2:.6f} ({count2}/{total2}),"
	f" {ratio3:.6f} ({count3}/{total3})")

	if __name__ == "__main__":
	main()