Spaces:
Sleeping
Sleeping
| # ============================================================================= | |
| # fsQCA (Fuzzy-Set Qualitative Comparative Analysis) λͺ¨λ | |
| # μ§μ ꡬν: 보μ (calibration) β μ§λ¦¬ν(truth table) β λΆμΈ μ΅μν | |
| # μ°Έκ³ : Ragin (2008), Redesigning Social Inquiry | |
| # ============================================================================= | |
| import pandas as pd | |
| import numpy as np | |
| from itertools import combinations, chain | |
| # ββ 1. 보μ (Calibration) βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def calibrate_direct(series: pd.Series, full_in: float, | |
| crossover: float, full_out: float) -> pd.Series: | |
| """μ§μ 보μ λ² (Ragin 3μ κΈ°μ€)""" | |
| s = series.copy().astype(float) | |
| result = pd.Series(index=s.index, dtype=float) | |
| for i, val in s.items(): | |
| if val >= full_in: | |
| result[i] = 0.99 | |
| elif val <= full_out: | |
| result[i] = 0.01 | |
| else: | |
| # λ‘μ§μ€ν± λ³ν | |
| log_odds = np.log((val - full_out + 1e-9) / (full_in - val + 1e-9)) | |
| result[i] = float(1 / (1 + np.exp(-log_odds))) | |
| return result.clip(0.01, 0.99) | |
| # ββ 2. νμ쑰건 λΆμ ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def necessary_conditions(df_fs: pd.DataFrame, outcome: str, | |
| conditions: list, threshold: float = 0.9): | |
| rows = [] | |
| y = df_fs[outcome] | |
| for cond in conditions: | |
| x = df_fs[cond] | |
| cov = float((x * y).sum() / (y.sum() + 1e-9)) | |
| cons = float((x * y).sum() / (x.sum() + 1e-9)) | |
| rows.append({ | |
| "쑰건": cond, | |
| "μΌκ΄μ±(Consistency)": round(cons, 3), | |
| "ν¬ν¨λ(Coverage)": round(cov, 3), | |
| "νμ쑰건": "β" if cons >= threshold else "β" | |
| }) | |
| return pd.DataFrame(rows) | |
| # ββ 3. μ§λ¦¬ν κ΅¬μ± ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_truth_table(df_fs: pd.DataFrame, outcome: str, | |
| conditions: list, freq_threshold: int = 1, | |
| cons_threshold: float = 0.75): | |
| n_conds = len(conditions) | |
| rows = [] | |
| for combo in range(2 ** n_conds): | |
| config = [(combo >> i) & 1 for i in range(n_conds - 1, -1, -1)] | |
| mask = pd.Series([True] * len(df_fs), index=df_fs.index) | |
| membership = pd.Series([1.0] * len(df_fs), index=df_fs.index) | |
| for ci, (cond, val) in enumerate(zip(conditions, config)): | |
| if val == 1: | |
| membership = membership * df_fs[cond] | |
| else: | |
| membership = membership * (1 - df_fs[cond]) | |
| row_members = membership[membership >= 0.5] | |
| freq = len(row_members) | |
| if freq < freq_threshold: | |
| continue | |
| y_vals = df_fs.loc[row_members.index, outcome] | |
| m_vals = row_members | |
| cons = float((m_vals * y_vals).sum() / (m_vals.sum() + 1e-9)) | |
| cov = float((m_vals * y_vals).sum() / (df_fs[outcome].sum() + 1e-9)) | |
| row = {} | |
| for ci, cond in enumerate(conditions): | |
| row[cond] = config[ci] | |
| row["λΉλ(N)"] = freq | |
| row["μΌκ΄μ±(Consistency)"] = round(cons, 3) | |
| row["ν¬ν¨λ(Coverage)"] = round(cov, 3) | |
| row["κ²°κ³Ό(1=ν¬ν¨)"] = 1 if cons >= cons_threshold else 0 | |
| rows.append(row) | |
| return pd.DataFrame(rows) if rows else pd.DataFrame() | |
| # ββ 4. μΆ©λΆμ‘°κ±΄ λΆμ (λ¨μ λ²μ ) ββββββββββββββββββββββββββββββββββββββββββββββ | |
| def sufficient_conditions(truth_table: pd.DataFrame, outcome: str, | |
| conditions: list, cons_threshold: float = 0.75): | |
| """μ§λ¦¬νμμ μΌκ΄μ± μΆ©μ‘± ν μΆμΆ β μΆ©λΆμ‘°κ±΄ ν¨ν΄ λ°ν""" | |
| if truth_table.empty: return pd.DataFrame() | |
| sufficient = truth_table[truth_table["κ²°κ³Ό(1=ν¬ν¨)"] == 1].copy() | |
| if sufficient.empty: return pd.DataFrame() | |
| result_rows = [] | |
| for _, row in sufficient.iterrows(): | |
| parts = [] | |
| for cond in conditions: | |
| val = row[cond] | |
| parts.append(f"{'~' if val==0 else ''}{cond}") | |
| result_rows.append({ | |
| "μΆ©λΆμ‘°κ±΄ μ‘°ν©": " * ".join(parts), | |
| "μΌκ΄μ±": row["μΌκ΄μ±(Consistency)"], | |
| "ν¬ν¨λ": row["ν¬ν¨λ(Coverage)"], | |
| "λΉλ": row["λΉλ(N)"] | |
| }) | |
| return pd.DataFrame(result_rows) | |
| # ββ 5. μ 체 fsQCA μ€ν ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def run_fsqca(df: pd.DataFrame, outcome_col: str, condition_cols: list, | |
| calibration_params: dict, # {col: (full_in, crossover, full_out)} | |
| freq_threshold: int = 1, | |
| cons_threshold: float = 0.75, | |
| nec_threshold: float = 0.9): | |
| """ | |
| Returns: dict with keys = λΆμλ¨κ³ μ΄λ¦, values = DataFrame | |
| """ | |
| # 보μ | |
| df_fs = pd.DataFrame(index=df.index) | |
| calib_info = [] | |
| for col in [outcome_col] + condition_cols: | |
| if col in calibration_params: | |
| fi, co, fo = calibration_params[col] | |
| df_fs[col] = calibrate_direct(df[col], fi, co, fo) | |
| calib_info.append({"λ³μ": col, "μμ ν¬ν¨(1)": fi, | |
| "κ΅μ°¨μ (.5)": co, "μμ λ°°μ (0)": fo}) | |
| else: | |
| # μλ 보μ : 5%, 50%, 95% λΆμ | |
| q = df[col].quantile([0.05, 0.5, 0.95]) | |
| df_fs[col] = calibrate_direct(df[col], q[0.95], q[0.5], q[0.05]) | |
| calib_info.append({"λ³μ": col, "μμ ν¬ν¨(1)": round(q[0.95],2), | |
| "κ΅μ°¨μ (.5)": round(q[0.5],2), "μμ λ°°μ (0)": round(q[0.05],2)}) | |
| calib_df = pd.DataFrame(calib_info) | |
| # κΈ°μ ν΅κ³ (보μ ν) | |
| desc_fs = df_fs.describe().T[["mean","std","min","max"]].round(3) | |
| desc_fs.columns = ["νκ· ","νμ€νΈμ°¨","μ΅μκ°","μ΅λκ°"] | |
| desc_fs = desc_fs.reset_index().rename(columns={"index":"λ³μ"}) | |
| # νμ쑰건 | |
| nec_df = necessary_conditions(df_fs, outcome_col, condition_cols, nec_threshold) | |
| # μ§λ¦¬ν | |
| tt = build_truth_table(df_fs, outcome_col, condition_cols, | |
| freq_threshold, cons_threshold) | |
| # μΆ©λΆμ‘°κ±΄ | |
| suf_df = sufficient_conditions(tt, outcome_col, condition_cols, cons_threshold) | |
| # μ 체 ν΄ ν΅κ³ | |
| if not suf_df.empty: | |
| sol_cons = suf_df["μΌκ΄μ±"].mean() | |
| sol_cov = suf_df["ν¬ν¨λ"].mean() | |
| sol_summary = pd.DataFrame([{ | |
| "ν΄ μ(μΆ©λΆμ‘°κ±΄ μ‘°ν©)": len(suf_df), | |
| "νκ· μΌκ΄μ±": round(sol_cons, 3), | |
| "νκ· ν¬ν¨λ": round(sol_cov, 3), | |
| "λΆμ κΈ°μ€(μΌκ΄μ± μκ³κ°)": cons_threshold | |
| }]) | |
| else: | |
| sol_summary = pd.DataFrame([{"μλ΄": "μΌκ΄μ± κΈ°μ€μ μΆ©μ‘±νλ μΆ©λΆμ‘°κ±΄ μ‘°ν©μ΄ μμ΅λλ€."}]) | |
| return { | |
| "보μ κΈ°μ€": calib_df, | |
| "보μ νκΈ°μ ν΅κ³": desc_fs, | |
| "νμ쑰건λΆμ": nec_df, | |
| "μ§λ¦¬ν": tt, | |
| "μΆ©λΆμ‘°κ±΄λΆμ": suf_df, | |
| "ν΄μμ½": sol_summary | |
| } | |