sem / modules /fsqca.py
cyj-26's picture
Upload 25 files
26c3195 verified
# =============================================================================
# fsQCA (Fuzzy-Set Qualitative Comparative Analysis) λͺ¨λ“ˆ
# 직접 κ΅¬ν˜„: 보정(calibration) β†’ μ§„λ¦¬ν‘œ(truth table) β†’ λΆ€μšΈ μ΅œμ†Œν™”
# μ°Έκ³ : Ragin (2008), Redesigning Social Inquiry
# =============================================================================
import pandas as pd
import numpy as np
from itertools import combinations, chain
# ── 1. 보정 (Calibration) ─────────────────────────────────────────────────────
def calibrate_direct(series: pd.Series, full_in: float,
crossover: float, full_out: float) -> pd.Series:
"""직접 보정법 (Ragin 3점 κΈ°μ€€)"""
s = series.copy().astype(float)
result = pd.Series(index=s.index, dtype=float)
for i, val in s.items():
if val >= full_in:
result[i] = 0.99
elif val <= full_out:
result[i] = 0.01
else:
# λ‘œμ§€μŠ€ν‹± λ³€ν™˜
log_odds = np.log((val - full_out + 1e-9) / (full_in - val + 1e-9))
result[i] = float(1 / (1 + np.exp(-log_odds)))
return result.clip(0.01, 0.99)
# ── 2. ν•„μš”μ‘°κ±΄ 뢄석 ──────────────────────────────────────────────────────────
def necessary_conditions(df_fs: pd.DataFrame, outcome: str,
conditions: list, threshold: float = 0.9):
rows = []
y = df_fs[outcome]
for cond in conditions:
x = df_fs[cond]
cov = float((x * y).sum() / (y.sum() + 1e-9))
cons = float((x * y).sum() / (x.sum() + 1e-9))
rows.append({
"쑰건": cond,
"일관성(Consistency)": round(cons, 3),
"포함도(Coverage)": round(cov, 3),
"ν•„μš”μ‘°κ±΄": "βœ“" if cons >= threshold else "βœ—"
})
return pd.DataFrame(rows)
# ── 3. μ§„λ¦¬ν‘œ ꡬ성 ────────────────────────────────────────────────────────────
def build_truth_table(df_fs: pd.DataFrame, outcome: str,
conditions: list, freq_threshold: int = 1,
cons_threshold: float = 0.75):
n_conds = len(conditions)
rows = []
for combo in range(2 ** n_conds):
config = [(combo >> i) & 1 for i in range(n_conds - 1, -1, -1)]
mask = pd.Series([True] * len(df_fs), index=df_fs.index)
membership = pd.Series([1.0] * len(df_fs), index=df_fs.index)
for ci, (cond, val) in enumerate(zip(conditions, config)):
if val == 1:
membership = membership * df_fs[cond]
else:
membership = membership * (1 - df_fs[cond])
row_members = membership[membership >= 0.5]
freq = len(row_members)
if freq < freq_threshold:
continue
y_vals = df_fs.loc[row_members.index, outcome]
m_vals = row_members
cons = float((m_vals * y_vals).sum() / (m_vals.sum() + 1e-9))
cov = float((m_vals * y_vals).sum() / (df_fs[outcome].sum() + 1e-9))
row = {}
for ci, cond in enumerate(conditions):
row[cond] = config[ci]
row["λΉˆλ„(N)"] = freq
row["일관성(Consistency)"] = round(cons, 3)
row["포함도(Coverage)"] = round(cov, 3)
row["κ²°κ³Ό(1=포함)"] = 1 if cons >= cons_threshold else 0
rows.append(row)
return pd.DataFrame(rows) if rows else pd.DataFrame()
# ── 4. 좩뢄쑰건 뢄석 (λ‹¨μˆœ 버전) ──────────────────────────────────────────────
def sufficient_conditions(truth_table: pd.DataFrame, outcome: str,
conditions: list, cons_threshold: float = 0.75):
"""μ§„λ¦¬ν‘œμ—μ„œ 일관성 μΆ©μ‘± ν–‰ μΆ”μΆœ β†’ 좩뢄쑰건 νŒ¨ν„΄ λ°˜ν™˜"""
if truth_table.empty: return pd.DataFrame()
sufficient = truth_table[truth_table["κ²°κ³Ό(1=포함)"] == 1].copy()
if sufficient.empty: return pd.DataFrame()
result_rows = []
for _, row in sufficient.iterrows():
parts = []
for cond in conditions:
val = row[cond]
parts.append(f"{'~' if val==0 else ''}{cond}")
result_rows.append({
"좩뢄쑰건 μ‘°ν•©": " * ".join(parts),
"일관성": row["일관성(Consistency)"],
"포함도": row["포함도(Coverage)"],
"λΉˆλ„": row["λΉˆλ„(N)"]
})
return pd.DataFrame(result_rows)
# ── 5. 전체 fsQCA μ‹€ν–‰ ────────────────────────────────────────────────────────
def run_fsqca(df: pd.DataFrame, outcome_col: str, condition_cols: list,
calibration_params: dict, # {col: (full_in, crossover, full_out)}
freq_threshold: int = 1,
cons_threshold: float = 0.75,
nec_threshold: float = 0.9):
"""
Returns: dict with keys = 뢄석단계 이름, values = DataFrame
"""
# 보정
df_fs = pd.DataFrame(index=df.index)
calib_info = []
for col in [outcome_col] + condition_cols:
if col in calibration_params:
fi, co, fo = calibration_params[col]
df_fs[col] = calibrate_direct(df[col], fi, co, fo)
calib_info.append({"λ³€μˆ˜": col, "완전포함(1)": fi,
"ꡐ차점(.5)": co, "μ™„μ „λ°°μ œ(0)": fo})
else:
# μžλ™ 보정: 5%, 50%, 95% λΆ„μœ„
q = df[col].quantile([0.05, 0.5, 0.95])
df_fs[col] = calibrate_direct(df[col], q[0.95], q[0.5], q[0.05])
calib_info.append({"λ³€μˆ˜": col, "완전포함(1)": round(q[0.95],2),
"ꡐ차점(.5)": round(q[0.5],2), "μ™„μ „λ°°μ œ(0)": round(q[0.05],2)})
calib_df = pd.DataFrame(calib_info)
# κΈ°μˆ ν†΅κ³„ (보정 ν›„)
desc_fs = df_fs.describe().T[["mean","std","min","max"]].round(3)
desc_fs.columns = ["평균","ν‘œμ€€νŽΈμ°¨","μ΅œμ†Ÿκ°’","μ΅œλŒ“κ°’"]
desc_fs = desc_fs.reset_index().rename(columns={"index":"λ³€μˆ˜"})
# ν•„μš”μ‘°κ±΄
nec_df = necessary_conditions(df_fs, outcome_col, condition_cols, nec_threshold)
# μ§„λ¦¬ν‘œ
tt = build_truth_table(df_fs, outcome_col, condition_cols,
freq_threshold, cons_threshold)
# 좩뢄쑰건
suf_df = sufficient_conditions(tt, outcome_col, condition_cols, cons_threshold)
# 전체 ν•΄ 톡계
if not suf_df.empty:
sol_cons = suf_df["일관성"].mean()
sol_cov = suf_df["포함도"].mean()
sol_summary = pd.DataFrame([{
"ν•΄ 수(좩뢄쑰건 μ‘°ν•©)": len(suf_df),
"평균 일관성": round(sol_cons, 3),
"평균 포함도": round(sol_cov, 3),
"뢄석 κΈ°μ€€(일관성 μž„κ³„κ°’)": cons_threshold
}])
else:
sol_summary = pd.DataFrame([{"μ•ˆλ‚΄": "일관성 기쀀을 μΆ©μ‘±ν•˜λŠ” 좩뢄쑰건 쑰합이 μ—†μŠ΅λ‹ˆλ‹€."}])
return {
"보정기쀀": calib_df,
"λ³΄μ •ν›„κΈ°μˆ ν†΅κ³„": desc_fs,
"ν•„μš”μ‘°κ±΄λΆ„μ„": nec_df,
"μ§„λ¦¬ν‘œ": tt,
"좩뢄쑰건뢄석": suf_df,
"ν•΄μš”μ•½": sol_summary
}