VJyzCELERY commited on
Commit
65f7e0a
·
1 Parent(s): 70cbebe

Final push of the day

Browse files
Files changed (1) hide show
  1. app.py +36 -37
app.py CHANGED
@@ -111,6 +111,34 @@ price_ranges_labels = [
111
  "$40 - $49.99",
112
  "$50+"
113
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
  def recommend_game(description=None, app_name=None, price_range=None, year_release=None,
116
  excpected_playtime=None, game_score=None, dlc_count=None,
@@ -340,11 +368,11 @@ df_games_raw['Developers'] = df_games_raw['Developers'].fillna('')
340
  df_games_raw['Publishers'] = df_games_raw['Publishers'].fillna('')
341
  df_games_raw.to_csv('Cleaned_games.csv',index=False)
342
  """)
343
- h2('Games Data Cleaned')
344
- gr.Dataframe(df_games.head(20))
345
 
346
  h2('2.2. Review Preprocessing')
347
- Dataset(df_review_raw,'Review Data Raw',REVIEWS_DATAPATH)
348
  code_cell("""
349
  from nltk.tokenize import word_tokenize
350
  from nltk.corpus import stopwords
@@ -869,7 +897,7 @@ df = col_to_list(df,'Genres')
869
  df = col_to_list(df,'Categories')
870
  df = apply_price_range_labels(df,price_labels,price_bins)
871
  """)
872
- Dataset(df_games,"The game dataset",GAMES_DATAPATH)
873
 
874
  code_cell("""
875
  def extract_year(date_str):
@@ -885,36 +913,7 @@ df['Game score'] = np.where(
885
  0,
886
  (df['Positive'] / (df['Positive'] + df['Negative'])) * 100
887
  )""")
888
- def game_df_create():
889
- df_games_temp = df_games
890
- df_games_temp = col_to_list(df_games_temp,'Genres')
891
- df_games_temp = col_to_list(df_games_temp,'Categories')
892
- df_games_temp = apply_price_range_labels(df_games_temp,price_ranges_labels,price_bins)
893
- df_games_temp['Year_Release'] = df_games_temp['Release date'].apply(extract_year)
894
- df_games_temp['Game score'] = np.where(
895
- (df_games_temp['Positive'] + df_games_temp['Negative']) == 0,
896
- 0,
897
- (df_games_temp['Positive'] / (df_games_temp['Positive'] + df_games_temp['Negative'])) * 100
898
- )
899
- genre_mlb = MultiLabelBinarizer()
900
- genre_mlb = genre_mlb.fit(df_games_temp['Genres'])
901
- categories_mlb = MultiLabelBinarizer()
902
- categories_mlb = categories_mlb.fit(df_games_temp['Categories'])
903
- price_range_le = model.game_content_recommeder.price_range_encoder
904
- scaler = MinMaxScaler()
905
- scaler = scaler.fit(df_games_temp[['Year_Release','Average playtime forever','Game score','DLC count']].values)
906
- app_id_le = LabelEncoder()
907
- app_id_le = app_id_le.fit(df_games_temp['app_id'])
908
- numerical_col =['Year_Release','Average playtime forever','Game score','DLC count']
909
-
910
- genre_matrix = genre_mlb.transform(df_games_temp['Genres'])
911
- genre_df = pd.DataFrame(genre_matrix, columns=genre_mlb.classes_, index=df_games_temp.index)
912
- categories_matrix = categories_mlb.transform(df_games_temp['Categories'])
913
- categories_df = pd.DataFrame(categories_matrix,columns=categories_mlb.classes_,index=df_games_temp.index)
914
- game_df = pd.concat([df_games_temp[['app_id','Price_range']+numerical_col],genre_df,categories_df],axis=1)
915
- game_df['Price_range'] = price_range_le.transform(game_df['Price_range'])
916
- game_df[numerical_col] = scaler.transform(game_df[numerical_col].values)
917
- return game_df.head(10)
918
  code_cell("""
919
  from sklearn.preprocessing import MultiLabelBinarizer,LabelEncoder,MinMaxScaler
920
  genre_mlb = MultiLabelBinarizer()
@@ -936,9 +935,9 @@ categories_matrix = categories_mlb.transform(df['Categories'])
936
  categories_df = pd.DataFrame(categories_matrix,columns=categories_mlb.classes_,index=df.index)
937
  game_df = pd.concat([df[['app_id','Price_range']+numerical_col],genre_df,categories_df],axis=1)""")
938
 
939
- btn = gr.Button("Run game_df preprocess")
940
- output_game_df = gr.Dataframe()
941
- btn.click(fn=game_df_create, inputs=None, outputs=output_game_df)
942
  code_cell("""
943
  from sklearn.neighbors import KNeighborsClassifier
944
  X = game_df.loc[:,['Year_Release','Average playtime forever','Game score','DLC count','Price_range']+ list(genre_mlb.classes_) + list(categories_mlb.classes_)]
 
111
  "$40 - $49.99",
112
  "$50+"
113
  ]
114
+ df_games_temp = df_games
115
+ df_games_temp = col_to_list(df_games_temp,'Genres')
116
+ df_games_temp = col_to_list(df_games_temp,'Categories')
117
+ df_games_temp = apply_price_range_labels(df_games_temp,price_ranges_labels,price_bins)
118
+ df_games_temp['Year_Release'] = df_games_temp['Release date'].apply(extract_year)
119
+ df_games_temp['Game score'] = np.where(
120
+ (df_games_temp['Positive'] + df_games_temp['Negative']) == 0,
121
+ 0,
122
+ (df_games_temp['Positive'] / (df_games_temp['Positive'] + df_games_temp['Negative'])) * 100
123
+ )
124
+ genre_mlb = MultiLabelBinarizer()
125
+ genre_mlb = genre_mlb.fit(df_games_temp['Genres'])
126
+ categories_mlb = MultiLabelBinarizer()
127
+ categories_mlb = categories_mlb.fit(df_games_temp['Categories'])
128
+ price_range_le = model.game_content_recommeder.price_range_encoder
129
+ scaler = MinMaxScaler()
130
+ scaler = scaler.fit(df_games_temp[['Year_Release','Average playtime forever','Game score','DLC count']].values)
131
+ app_id_le = LabelEncoder()
132
+ app_id_le = app_id_le.fit(df_games_temp['app_id'])
133
+ numerical_col =['Year_Release','Average playtime forever','Game score','DLC count']
134
+
135
+ genre_matrix = genre_mlb.transform(df_games_temp['Genres'])
136
+ genre_df = pd.DataFrame(genre_matrix, columns=genre_mlb.classes_, index=df_games_temp.index)
137
+ categories_matrix = categories_mlb.transform(df_games_temp['Categories'])
138
+ categories_df = pd.DataFrame(categories_matrix,columns=categories_mlb.classes_,index=df_games_temp.index)
139
+ game_df = pd.concat([df_games_temp[['app_id','Price_range']+numerical_col],genre_df,categories_df],axis=1)
140
+ game_df['Price_range'] = price_range_le.transform(game_df['Price_range'])
141
+ game_df[numerical_col] = scaler.transform(game_df[numerical_col].values)
142
 
143
  def recommend_game(description=None, app_name=None, price_range=None, year_release=None,
144
  excpected_playtime=None, game_score=None, dlc_count=None,
 
368
  df_games_raw['Publishers'] = df_games_raw['Publishers'].fillna('')
369
  df_games_raw.to_csv('Cleaned_games.csv',index=False)
370
  """)
371
+ # h2('Games Data Cleaned')
372
+ # gr.Dataframe(df_games.head(20))
373
 
374
  h2('2.2. Review Preprocessing')
375
+ # Dataset(df_review_raw,'Review Data Raw',REVIEWS_DATAPATH)
376
  code_cell("""
377
  from nltk.tokenize import word_tokenize
378
  from nltk.corpus import stopwords
 
897
  df = col_to_list(df,'Categories')
898
  df = apply_price_range_labels(df,price_labels,price_bins)
899
  """)
900
+ # Dataset(df_games,"The game dataset",GAMES_DATAPATH)
901
 
902
  code_cell("""
903
  def extract_year(date_str):
 
913
  0,
914
  (df['Positive'] / (df['Positive'] + df['Negative'])) * 100
915
  )""")
916
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
917
  code_cell("""
918
  from sklearn.preprocessing import MultiLabelBinarizer,LabelEncoder,MinMaxScaler
919
  genre_mlb = MultiLabelBinarizer()
 
935
  categories_df = pd.DataFrame(categories_matrix,columns=categories_mlb.classes_,index=df.index)
936
  game_df = pd.concat([df[['app_id','Price_range']+numerical_col],genre_df,categories_df],axis=1)""")
937
 
938
+ gr.Dataframe(df_games_temp.head(10))
939
+ del df_games_temp
940
+ gc.collect()
941
  code_cell("""
942
  from sklearn.neighbors import KNeighborsClassifier
943
  X = game_df.loc[:,['Year_Release','Average playtime forever','Game score','DLC count','Price_range']+ list(genre_mlb.classes_) + list(categories_mlb.classes_)]