| import pandas as pd |
| import numpy as np |
|
|
|
|
| def run_association(df, item_cols, min_support=0.05, |
| min_confidence=0.3, min_lift=1.0, max_len=3): |
| try: |
| from mlxtend.frequent_patterns import apriori, association_rules |
| except ImportError: |
| return None, None, "mlxtend ํจํค์ง๊ฐ ํ์ํฉ๋๋ค: pip install mlxtend" |
|
|
| try: |
| data = df[item_cols].copy() |
|
|
| |
| flat = data.values.flatten() |
| flat_clean = flat[~pd.isna(flat)] |
| unique_vals = set(flat_clean) |
|
|
| if unique_vals <= {0, 1, True, False, 0.0, 1.0}: |
| basket = data.fillna(0).astype(bool) |
| else: |
| basket = pd.get_dummies(data.astype(str)).astype(bool) |
|
|
| |
| freq_items = apriori(basket, min_support=min_support, |
| use_colnames=True, max_len=max_len) |
|
|
| if freq_items.empty: |
| return pd.DataFrame(), pd.DataFrame(), \ |
| f"์ง์ง๋ {min_support} ์ด์์ธ ๋น๋ฐํญ๋ชฉ์ด ์์ต๋๋ค. ์๊ณ๊ฐ์ ๋ฎ์ถฐ๋ณด์ธ์." |
|
|
| freq_items["ํญ๋ชฉ์งํฉ"] = freq_items["itemsets"].apply( |
| lambda x: ", ".join(sorted(list(x)))) |
| freq_out = freq_items[["ํญ๋ชฉ์งํฉ","support"]].copy() |
| freq_out.columns = ["ํญ๋ชฉ์งํฉ","์ง์ง๋"] |
| freq_out = freq_out.sort_values("์ง์ง๋", ascending=False).reset_index(drop=True) |
| freq_out["์ง์ง๋"] = freq_out["์ง์ง๋"].round(4) |
|
|
| |
| rules = association_rules(freq_items, metric="confidence", |
| min_threshold=min_confidence, |
| num_itemsets=len(freq_items)) |
| if rules.empty: |
| return freq_out, pd.DataFrame(), \ |
| f"์ ๋ขฐ๋ {min_confidence} ์ด์์ธ ๊ท์น์ด ์์ต๋๋ค." |
|
|
| rules = rules[rules["lift"] >= min_lift].copy() |
| rules["์กฐ๊ฑด๋ถ(IF)"] = rules["antecedents"].apply(lambda x: ", ".join(sorted(list(x)))) |
| rules["๊ฒฐ๊ณผ(THEN)"] = rules["consequents"].apply(lambda x: ", ".join(sorted(list(x)))) |
|
|
| rules_out = rules[["์กฐ๊ฑด๋ถ(IF)","๊ฒฐ๊ณผ(THEN)", |
| "support","confidence","lift"]].copy() |
| rules_out.columns = ["์กฐ๊ฑด๋ถ(IF)","๊ฒฐ๊ณผ(THEN)","์ง์ง๋","์ ๋ขฐ๋","ํฅ์๋"] |
| rules_out = rules_out.sort_values("ํฅ์๋", ascending=False).reset_index(drop=True) |
| for col in ["์ง์ง๋","์ ๋ขฐ๋","ํฅ์๋"]: |
| rules_out[col] = rules_out[col].round(4) |
|
|
| return freq_out, rules_out, None |
|
|
| except Exception as e: |
| return None, None, f"์ฐ๊ด๋ถ์ ์ค๋ฅ: {str(e)}" |
|
|