| |
| |
| |
| |
| |
| import pandas as pd |
| import numpy as np |
| from itertools import combinations, chain |
|
|
|
|
| |
| def calibrate_direct(series: pd.Series, full_in: float, |
| crossover: float, full_out: float) -> pd.Series: |
| """μ§μ 보μ λ² (Ragin 3μ κΈ°μ€)""" |
| s = series.copy().astype(float) |
| result = pd.Series(index=s.index, dtype=float) |
| for i, val in s.items(): |
| if val >= full_in: |
| result[i] = 0.99 |
| elif val <= full_out: |
| result[i] = 0.01 |
| else: |
| |
| log_odds = np.log((val - full_out + 1e-9) / (full_in - val + 1e-9)) |
| result[i] = float(1 / (1 + np.exp(-log_odds))) |
| return result.clip(0.01, 0.99) |
|
|
|
|
| |
| def necessary_conditions(df_fs: pd.DataFrame, outcome: str, |
| conditions: list, threshold: float = 0.9): |
| rows = [] |
| y = df_fs[outcome] |
| for cond in conditions: |
| x = df_fs[cond] |
| cov = float((x * y).sum() / (y.sum() + 1e-9)) |
| cons = float((x * y).sum() / (x.sum() + 1e-9)) |
| rows.append({ |
| "쑰건": cond, |
| "μΌκ΄μ±(Consistency)": round(cons, 3), |
| "ν¬ν¨λ(Coverage)": round(cov, 3), |
| "νμ쑰건": "β" if cons >= threshold else "β" |
| }) |
| return pd.DataFrame(rows) |
|
|
|
|
| |
| def build_truth_table(df_fs: pd.DataFrame, outcome: str, |
| conditions: list, freq_threshold: int = 1, |
| cons_threshold: float = 0.75): |
| n_conds = len(conditions) |
| rows = [] |
|
|
| for combo in range(2 ** n_conds): |
| config = [(combo >> i) & 1 for i in range(n_conds - 1, -1, -1)] |
| mask = pd.Series([True] * len(df_fs), index=df_fs.index) |
| membership = pd.Series([1.0] * len(df_fs), index=df_fs.index) |
|
|
| for ci, (cond, val) in enumerate(zip(conditions, config)): |
| if val == 1: |
| membership = membership * df_fs[cond] |
| else: |
| membership = membership * (1 - df_fs[cond]) |
|
|
| row_members = membership[membership >= 0.5] |
| freq = len(row_members) |
| if freq < freq_threshold: |
| continue |
|
|
| y_vals = df_fs.loc[row_members.index, outcome] |
| m_vals = row_members |
|
|
| cons = float((m_vals * y_vals).sum() / (m_vals.sum() + 1e-9)) |
| cov = float((m_vals * y_vals).sum() / (df_fs[outcome].sum() + 1e-9)) |
|
|
| row = {} |
| for ci, cond in enumerate(conditions): |
| row[cond] = config[ci] |
| row["λΉλ(N)"] = freq |
| row["μΌκ΄μ±(Consistency)"] = round(cons, 3) |
| row["ν¬ν¨λ(Coverage)"] = round(cov, 3) |
| row["κ²°κ³Ό(1=ν¬ν¨)"] = 1 if cons >= cons_threshold else 0 |
| rows.append(row) |
|
|
| return pd.DataFrame(rows) if rows else pd.DataFrame() |
|
|
|
|
| |
| def sufficient_conditions(truth_table: pd.DataFrame, outcome: str, |
| conditions: list, cons_threshold: float = 0.75): |
| """μ§λ¦¬νμμ μΌκ΄μ± μΆ©μ‘± ν μΆμΆ β μΆ©λΆμ‘°κ±΄ ν¨ν΄ λ°ν""" |
| if truth_table.empty: return pd.DataFrame() |
| sufficient = truth_table[truth_table["κ²°κ³Ό(1=ν¬ν¨)"] == 1].copy() |
| if sufficient.empty: return pd.DataFrame() |
|
|
| result_rows = [] |
| for _, row in sufficient.iterrows(): |
| parts = [] |
| for cond in conditions: |
| val = row[cond] |
| parts.append(f"{'~' if val==0 else ''}{cond}") |
| result_rows.append({ |
| "μΆ©λΆμ‘°κ±΄ μ‘°ν©": " * ".join(parts), |
| "μΌκ΄μ±": row["μΌκ΄μ±(Consistency)"], |
| "ν¬ν¨λ": row["ν¬ν¨λ(Coverage)"], |
| "λΉλ": row["λΉλ(N)"] |
| }) |
| return pd.DataFrame(result_rows) |
|
|
|
|
| |
| def run_fsqca(df: pd.DataFrame, outcome_col: str, condition_cols: list, |
| calibration_params: dict, |
| freq_threshold: int = 1, |
| cons_threshold: float = 0.75, |
| nec_threshold: float = 0.9): |
| """ |
| Returns: dict with keys = λΆμλ¨κ³ μ΄λ¦, values = DataFrame |
| """ |
| |
| df_fs = pd.DataFrame(index=df.index) |
| calib_info = [] |
| for col in [outcome_col] + condition_cols: |
| if col in calibration_params: |
| fi, co, fo = calibration_params[col] |
| df_fs[col] = calibrate_direct(df[col], fi, co, fo) |
| calib_info.append({"λ³μ": col, "μμ ν¬ν¨(1)": fi, |
| "κ΅μ°¨μ (.5)": co, "μμ λ°°μ (0)": fo}) |
| else: |
| |
| q = df[col].quantile([0.05, 0.5, 0.95]) |
| df_fs[col] = calibrate_direct(df[col], q[0.95], q[0.5], q[0.05]) |
| calib_info.append({"λ³μ": col, "μμ ν¬ν¨(1)": round(q[0.95],2), |
| "κ΅μ°¨μ (.5)": round(q[0.5],2), "μμ λ°°μ (0)": round(q[0.05],2)}) |
|
|
| calib_df = pd.DataFrame(calib_info) |
|
|
| |
| desc_fs = df_fs.describe().T[["mean","std","min","max"]].round(3) |
| desc_fs.columns = ["νκ· ","νμ€νΈμ°¨","μ΅μκ°","μ΅λκ°"] |
| desc_fs = desc_fs.reset_index().rename(columns={"index":"λ³μ"}) |
|
|
| |
| nec_df = necessary_conditions(df_fs, outcome_col, condition_cols, nec_threshold) |
|
|
| |
| tt = build_truth_table(df_fs, outcome_col, condition_cols, |
| freq_threshold, cons_threshold) |
|
|
| |
| suf_df = sufficient_conditions(tt, outcome_col, condition_cols, cons_threshold) |
|
|
| |
| if not suf_df.empty: |
| sol_cons = suf_df["μΌκ΄μ±"].mean() |
| sol_cov = suf_df["ν¬ν¨λ"].mean() |
| sol_summary = pd.DataFrame([{ |
| "ν΄ μ(μΆ©λΆμ‘°κ±΄ μ‘°ν©)": len(suf_df), |
| "νκ· μΌκ΄μ±": round(sol_cons, 3), |
| "νκ· ν¬ν¨λ": round(sol_cov, 3), |
| "λΆμ κΈ°μ€(μΌκ΄μ± μκ³κ°)": cons_threshold |
| }]) |
| else: |
| sol_summary = pd.DataFrame([{"μλ΄": "μΌκ΄μ± κΈ°μ€μ μΆ©μ‘±νλ μΆ©λΆμ‘°κ±΄ μ‘°ν©μ΄ μμ΅λλ€."}]) |
|
|
| return { |
| "보μ κΈ°μ€": calib_df, |
| "보μ νκΈ°μ ν΅κ³": desc_fs, |
| "νμ쑰건λΆμ": nec_df, |
| "μ§λ¦¬ν": tt, |
| "μΆ©λΆμ‘°κ±΄λΆμ": suf_df, |
| "ν΄μμ½": sol_summary |
| } |
|
|