Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import os | |
| from tqdm import tqdm | |
| from collections import defaultdict | |
| from mlxtend.preprocessing import TransactionEncoder | |
| from mlxtend.frequent_patterns import apriori | |
| dataPath = "data/static" | |
| itemSetList = [] | |
| def loadDataSet(): | |
| with open(os.path.join(dataPath, "aprioriData.csv"), 'r') as f: | |
| for line in f.readlines(): | |
| line = line.replace('\n', '') | |
| cates = line.split(' ') | |
| itemSetList.append(list(map(int, cates))) | |
| def myApriori(): | |
| te = TransactionEncoder() | |
| te_ary = te.fit(itemSetList).transform(itemSetList) | |
| df = pd.DataFrame(te_ary, columns=te.columns_) | |
| return df | |
| def dataInit(): | |
| if os.path.exists(os.path.join(dataPath, "aprioriData.csv")): | |
| return | |
| df = pd.read_csv("data/static/static.csv") | |
| user_category = defaultdict(set) | |
| for idx, row in tqdm(df.iterrows(), total=df.shape[0], desc="category data generate"): | |
| user_category[row['USER_ID']].add(row['CATEGORY_ID']) | |
| with open(os.path.join(dataPath, "aprioriData.csv"), 'w+') as f: | |
| for k, v in tqdm(user_category.items()): | |
| f.write(' '.join(sorted(list(map(str, v))))+'\n') | |
| if __name__ == '__main__': | |
| dataInit() | |
| loadDataSet() | |
| df = myApriori() | |
| frequent_itemsets = apriori(df, min_support=0.0035, use_colnames=True) | |
| frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x)) | |
| print(frequent_itemsets[(frequent_itemsets['length'] >= 2)]) | |