Spaces:
Sleeping
Sleeping
File size: 1,483 Bytes
eeef81e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | import pandas as pd
import os
from tqdm import tqdm
from collections import defaultdict
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
dataPath = "data/static"
itemSetList = []
def loadDataSet():
with open(os.path.join(dataPath, "aprioriData.csv"), 'r') as f:
for line in f.readlines():
line = line.replace('\n', '')
cates = line.split(' ')
itemSetList.append(list(map(int, cates)))
def myApriori():
te = TransactionEncoder()
te_ary = te.fit(itemSetList).transform(itemSetList)
df = pd.DataFrame(te_ary, columns=te.columns_)
return df
def dataInit():
if os.path.exists(os.path.join(dataPath, "aprioriData.csv")):
return
df = pd.read_csv("data/static/static.csv")
user_category = defaultdict(set)
for idx, row in tqdm(df.iterrows(), total=df.shape[0], desc="category data generate"):
user_category[row['USER_ID']].add(row['CATEGORY_ID'])
with open(os.path.join(dataPath, "aprioriData.csv"), 'w+') as f:
for k, v in tqdm(user_category.items()):
f.write(' '.join(sorted(list(map(str, v))))+'\n')
if __name__ == '__main__':
dataInit()
loadDataSet()
df = myApriori()
frequent_itemsets = apriori(df, min_support=0.0035, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
print(frequent_itemsets[(frequent_itemsets['length'] >= 2)])
|