Spaces:
Runtime error
Runtime error
| import gzip | |
| import hickle | |
| import _pickle as cPickle | |
| import itertools | |
| import time | |
| def get_num_neighbor(G,etype): | |
| print(G.edges(etype=etype)) | |
| for i in G.edges(etype=etype): | |
| print(i) | |
| # exit() | |
| def neighbormap(df,dic,user_dic,new_item_dic,col_user='user_id',col_item='item_id'): | |
| t=time.time() | |
| print('Start time') | |
| for i in range(len(df)): | |
| user=df.at[i,col_user] | |
| item=df.at[i,col_item] | |
| if item in new_item_dic: | |
| dic[user_dic[user]].append(new_item_dic[item]) | |
| print('End time',time.time()-t) | |
| return dic | |
| def split_char(str): | |
| english = 'abcdefghijklmnopqrstuvwxyz0123456789' | |
| output = [] | |
| buffer = '' | |
| try: | |
| for s in str: | |
| if s in english or s in english.upper(): # English or numeric | |
| buffer += s | |
| elif s in ' ()*()【】/-.': # If it is a special symbol such as a space, skip it | |
| continue | |
| else: # Chinese | |
| if buffer: | |
| output.append(buffer) | |
| buffer = '' | |
| output.append(s) | |
| if buffer: | |
| output.append(buffer) | |
| except: | |
| print(str) | |
| return output | |
| def filter_sample(threshold,dic): | |
| del_index = [] | |
| out = [] | |
| for key,value in dic.items(): | |
| if len(set(value)) < threshold: | |
| del_index.append(key) | |
| else: | |
| neirghbor = value | |
| out.append(neirghbor[:threshold]) | |
| return out,del_index | |
| def combination(df,users,col_user='user_id',col_item='item_id'): | |
| df = df[df[col_user].isin(users)] # Filtering, the user must be a user who meets the conditions | |
| df.reset_index(drop=True, inplace=True) | |
| df_item=df[col_item].value_counts() | |
| items = df_item[df_item >= 10].to_dict().keys() # Filtered, the number of users clicked on the item should be greater than a certain value | |
| df = df[df[col_item].isin(items)] | |
| df.reset_index(drop=True, inplace=True) | |
| print(df.shape,len(list(df.groupby([col_item])))) | |
| out = [] | |
| for iter in df.groupby([col_item]): | |
| l = iter[1][col_user].tolist() | |
| l = [x for x in l if x in set(users)] | |
| pairs = list(itertools.combinations(l, 2))[:10 if 10>len(l) else len(l)] | |
| out.extend(pairs) | |
| out = list(zip(*set(out))) | |
| print('Number of sides after de-duplication:', len(out[0])) | |
| return out | |