sem / modules /association.py
cyj-26's picture
Upload 25 files
26c3195 verified
import pandas as pd
import numpy as np
def run_association(df, item_cols, min_support=0.05,
min_confidence=0.3, min_lift=1.0, max_len=3):
try:
from mlxtend.frequent_patterns import apriori, association_rules
except ImportError:
return None, None, "mlxtend ํŒจํ‚ค์ง€๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค: pip install mlxtend"
try:
data = df[item_cols].copy()
# ์ด์ง„ ๋ณ€ํ™˜
flat = data.values.flatten()
flat_clean = flat[~pd.isna(flat)]
unique_vals = set(flat_clean)
if unique_vals <= {0, 1, True, False, 0.0, 1.0}:
basket = data.fillna(0).astype(bool)
else:
basket = pd.get_dummies(data.astype(str)).astype(bool)
# Apriori
freq_items = apriori(basket, min_support=min_support,
use_colnames=True, max_len=max_len)
if freq_items.empty:
return pd.DataFrame(), pd.DataFrame(), \
f"์ง€์ง€๋„ {min_support} ์ด์ƒ์ธ ๋นˆ๋ฐœํ•ญ๋ชฉ์ด ์—†์Šต๋‹ˆ๋‹ค. ์ž„๊ณ„๊ฐ’์„ ๋‚ฎ์ถฐ๋ณด์„ธ์š”."
freq_items["ํ•ญ๋ชฉ์ง‘ํ•ฉ"] = freq_items["itemsets"].apply(
lambda x: ", ".join(sorted(list(x))))
freq_out = freq_items[["ํ•ญ๋ชฉ์ง‘ํ•ฉ","support"]].copy()
freq_out.columns = ["ํ•ญ๋ชฉ์ง‘ํ•ฉ","์ง€์ง€๋„"]
freq_out = freq_out.sort_values("์ง€์ง€๋„", ascending=False).reset_index(drop=True)
freq_out["์ง€์ง€๋„"] = freq_out["์ง€์ง€๋„"].round(4)
# ์—ฐ๊ด€ ๊ทœ์น™
rules = association_rules(freq_items, metric="confidence",
min_threshold=min_confidence,
num_itemsets=len(freq_items))
if rules.empty:
return freq_out, pd.DataFrame(), \
f"์‹ ๋ขฐ๋„ {min_confidence} ์ด์ƒ์ธ ๊ทœ์น™์ด ์—†์Šต๋‹ˆ๋‹ค."
rules = rules[rules["lift"] >= min_lift].copy()
rules["์กฐ๊ฑด๋ถ€(IF)"] = rules["antecedents"].apply(lambda x: ", ".join(sorted(list(x))))
rules["๊ฒฐ๊ณผ(THEN)"] = rules["consequents"].apply(lambda x: ", ".join(sorted(list(x))))
rules_out = rules[["์กฐ๊ฑด๋ถ€(IF)","๊ฒฐ๊ณผ(THEN)",
"support","confidence","lift"]].copy()
rules_out.columns = ["์กฐ๊ฑด๋ถ€(IF)","๊ฒฐ๊ณผ(THEN)","์ง€์ง€๋„","์‹ ๋ขฐ๋„","ํ–ฅ์ƒ๋„"]
rules_out = rules_out.sort_values("ํ–ฅ์ƒ๋„", ascending=False).reset_index(drop=True)
for col in ["์ง€์ง€๋„","์‹ ๋ขฐ๋„","ํ–ฅ์ƒ๋„"]:
rules_out[col] = rules_out[col].round(4)
return freq_out, rules_out, None
except Exception as e:
return None, None, f"์—ฐ๊ด€๋ถ„์„ ์˜ค๋ฅ˜: {str(e)}"