import pandas as pd import numpy as np from pathlib import Path # CSV 로드 df = pd.read_csv("results.csv") # QC용 중복 영상 ID qc_video = "Wan2.2/Wan2.2_BodyWeightSquats_03_g09_c02.mp4" # 숫자형 변환 for col in ["action_consistency", "physical_plausibility"]: df[col] = pd.to_numeric(df[col], errors="coerce") # QC용 영상만 필터링 dup_df = df[df["video_id"] == qc_video].copy() qc_rows = [] for pid, g in dup_df.groupby("participant_id"): if len(g) < 2: continue # 1회만 응답한 참가자 제외 ac = g["action_consistency"].dropna().to_numpy() pp = g["physical_plausibility"].dropna().to_numpy() qc_rows.append({ "participant_id": pid, "n_repeat": len(g), "ac_mean": np.mean(ac), "ac_std": np.std(ac, ddof=0), "ac_range": np.max(ac) - np.min(ac), "ac_mae": np.mean(np.abs(ac - np.mean(ac))), "pp_mean": np.mean(pp), "pp_std": np.std(pp, ddof=0), "pp_range": np.max(pp) - np.min(pp), "pp_mae": np.mean(np.abs(pp - np.mean(pp))), }) qc = pd.DataFrame(qc_rows) # 품질 판정 (임계값은 필요 시 조정) qc["low_quality_flag"] = ( (qc["ac_std"] > 2.0) | (qc["ac_range"] > 4.0) | (qc["pp_std"] > 2.0) | (qc["pp_range"] > 4.0) ) # 저장 Path("qc_outputs").mkdir(exist_ok=True) qc_path = "qc_outputs/participant_qc_single_duplicate.csv" qc.to_csv(qc_path, index=False) print(f"Saved to {qc_path}") # 결과 미리보기 print(qc.sort_values("low_quality_flag", ascending=False))