VJyzCELERY
commited on
Commit
·
65f7e0a
1
Parent(s):
70cbebe
Final push of the day
Browse files
app.py
CHANGED
|
@@ -111,6 +111,34 @@ price_ranges_labels = [
|
|
| 111 |
"$40 - $49.99",
|
| 112 |
"$50+"
|
| 113 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
def recommend_game(description=None, app_name=None, price_range=None, year_release=None,
|
| 116 |
excpected_playtime=None, game_score=None, dlc_count=None,
|
|
@@ -340,11 +368,11 @@ df_games_raw['Developers'] = df_games_raw['Developers'].fillna('')
|
|
| 340 |
df_games_raw['Publishers'] = df_games_raw['Publishers'].fillna('')
|
| 341 |
df_games_raw.to_csv('Cleaned_games.csv',index=False)
|
| 342 |
""")
|
| 343 |
-
h2('Games Data Cleaned')
|
| 344 |
-
gr.Dataframe(df_games.head(20))
|
| 345 |
|
| 346 |
h2('2.2. Review Preprocessing')
|
| 347 |
-
Dataset(df_review_raw,'Review Data Raw',REVIEWS_DATAPATH)
|
| 348 |
code_cell("""
|
| 349 |
from nltk.tokenize import word_tokenize
|
| 350 |
from nltk.corpus import stopwords
|
|
@@ -869,7 +897,7 @@ df = col_to_list(df,'Genres')
|
|
| 869 |
df = col_to_list(df,'Categories')
|
| 870 |
df = apply_price_range_labels(df,price_labels,price_bins)
|
| 871 |
""")
|
| 872 |
-
Dataset(df_games,"The game dataset",GAMES_DATAPATH)
|
| 873 |
|
| 874 |
code_cell("""
|
| 875 |
def extract_year(date_str):
|
|
@@ -885,36 +913,7 @@ df['Game score'] = np.where(
|
|
| 885 |
0,
|
| 886 |
(df['Positive'] / (df['Positive'] + df['Negative'])) * 100
|
| 887 |
)""")
|
| 888 |
-
|
| 889 |
-
df_games_temp = df_games
|
| 890 |
-
df_games_temp = col_to_list(df_games_temp,'Genres')
|
| 891 |
-
df_games_temp = col_to_list(df_games_temp,'Categories')
|
| 892 |
-
df_games_temp = apply_price_range_labels(df_games_temp,price_ranges_labels,price_bins)
|
| 893 |
-
df_games_temp['Year_Release'] = df_games_temp['Release date'].apply(extract_year)
|
| 894 |
-
df_games_temp['Game score'] = np.where(
|
| 895 |
-
(df_games_temp['Positive'] + df_games_temp['Negative']) == 0,
|
| 896 |
-
0,
|
| 897 |
-
(df_games_temp['Positive'] / (df_games_temp['Positive'] + df_games_temp['Negative'])) * 100
|
| 898 |
-
)
|
| 899 |
-
genre_mlb = MultiLabelBinarizer()
|
| 900 |
-
genre_mlb = genre_mlb.fit(df_games_temp['Genres'])
|
| 901 |
-
categories_mlb = MultiLabelBinarizer()
|
| 902 |
-
categories_mlb = categories_mlb.fit(df_games_temp['Categories'])
|
| 903 |
-
price_range_le = model.game_content_recommeder.price_range_encoder
|
| 904 |
-
scaler = MinMaxScaler()
|
| 905 |
-
scaler = scaler.fit(df_games_temp[['Year_Release','Average playtime forever','Game score','DLC count']].values)
|
| 906 |
-
app_id_le = LabelEncoder()
|
| 907 |
-
app_id_le = app_id_le.fit(df_games_temp['app_id'])
|
| 908 |
-
numerical_col =['Year_Release','Average playtime forever','Game score','DLC count']
|
| 909 |
-
|
| 910 |
-
genre_matrix = genre_mlb.transform(df_games_temp['Genres'])
|
| 911 |
-
genre_df = pd.DataFrame(genre_matrix, columns=genre_mlb.classes_, index=df_games_temp.index)
|
| 912 |
-
categories_matrix = categories_mlb.transform(df_games_temp['Categories'])
|
| 913 |
-
categories_df = pd.DataFrame(categories_matrix,columns=categories_mlb.classes_,index=df_games_temp.index)
|
| 914 |
-
game_df = pd.concat([df_games_temp[['app_id','Price_range']+numerical_col],genre_df,categories_df],axis=1)
|
| 915 |
-
game_df['Price_range'] = price_range_le.transform(game_df['Price_range'])
|
| 916 |
-
game_df[numerical_col] = scaler.transform(game_df[numerical_col].values)
|
| 917 |
-
return game_df.head(10)
|
| 918 |
code_cell("""
|
| 919 |
from sklearn.preprocessing import MultiLabelBinarizer,LabelEncoder,MinMaxScaler
|
| 920 |
genre_mlb = MultiLabelBinarizer()
|
|
@@ -936,9 +935,9 @@ categories_matrix = categories_mlb.transform(df['Categories'])
|
|
| 936 |
categories_df = pd.DataFrame(categories_matrix,columns=categories_mlb.classes_,index=df.index)
|
| 937 |
game_df = pd.concat([df[['app_id','Price_range']+numerical_col],genre_df,categories_df],axis=1)""")
|
| 938 |
|
| 939 |
-
|
| 940 |
-
|
| 941 |
-
|
| 942 |
code_cell("""
|
| 943 |
from sklearn.neighbors import KNeighborsClassifier
|
| 944 |
X = game_df.loc[:,['Year_Release','Average playtime forever','Game score','DLC count','Price_range']+ list(genre_mlb.classes_) + list(categories_mlb.classes_)]
|
|
|
|
| 111 |
"$40 - $49.99",
|
| 112 |
"$50+"
|
| 113 |
]
|
| 114 |
+
df_games_temp = df_games
|
| 115 |
+
df_games_temp = col_to_list(df_games_temp,'Genres')
|
| 116 |
+
df_games_temp = col_to_list(df_games_temp,'Categories')
|
| 117 |
+
df_games_temp = apply_price_range_labels(df_games_temp,price_ranges_labels,price_bins)
|
| 118 |
+
df_games_temp['Year_Release'] = df_games_temp['Release date'].apply(extract_year)
|
| 119 |
+
df_games_temp['Game score'] = np.where(
|
| 120 |
+
(df_games_temp['Positive'] + df_games_temp['Negative']) == 0,
|
| 121 |
+
0,
|
| 122 |
+
(df_games_temp['Positive'] / (df_games_temp['Positive'] + df_games_temp['Negative'])) * 100
|
| 123 |
+
)
|
| 124 |
+
genre_mlb = MultiLabelBinarizer()
|
| 125 |
+
genre_mlb = genre_mlb.fit(df_games_temp['Genres'])
|
| 126 |
+
categories_mlb = MultiLabelBinarizer()
|
| 127 |
+
categories_mlb = categories_mlb.fit(df_games_temp['Categories'])
|
| 128 |
+
price_range_le = model.game_content_recommeder.price_range_encoder
|
| 129 |
+
scaler = MinMaxScaler()
|
| 130 |
+
scaler = scaler.fit(df_games_temp[['Year_Release','Average playtime forever','Game score','DLC count']].values)
|
| 131 |
+
app_id_le = LabelEncoder()
|
| 132 |
+
app_id_le = app_id_le.fit(df_games_temp['app_id'])
|
| 133 |
+
numerical_col =['Year_Release','Average playtime forever','Game score','DLC count']
|
| 134 |
+
|
| 135 |
+
genre_matrix = genre_mlb.transform(df_games_temp['Genres'])
|
| 136 |
+
genre_df = pd.DataFrame(genre_matrix, columns=genre_mlb.classes_, index=df_games_temp.index)
|
| 137 |
+
categories_matrix = categories_mlb.transform(df_games_temp['Categories'])
|
| 138 |
+
categories_df = pd.DataFrame(categories_matrix,columns=categories_mlb.classes_,index=df_games_temp.index)
|
| 139 |
+
game_df = pd.concat([df_games_temp[['app_id','Price_range']+numerical_col],genre_df,categories_df],axis=1)
|
| 140 |
+
game_df['Price_range'] = price_range_le.transform(game_df['Price_range'])
|
| 141 |
+
game_df[numerical_col] = scaler.transform(game_df[numerical_col].values)
|
| 142 |
|
| 143 |
def recommend_game(description=None, app_name=None, price_range=None, year_release=None,
|
| 144 |
excpected_playtime=None, game_score=None, dlc_count=None,
|
|
|
|
| 368 |
df_games_raw['Publishers'] = df_games_raw['Publishers'].fillna('')
|
| 369 |
df_games_raw.to_csv('Cleaned_games.csv',index=False)
|
| 370 |
""")
|
| 371 |
+
# h2('Games Data Cleaned')
|
| 372 |
+
# gr.Dataframe(df_games.head(20))
|
| 373 |
|
| 374 |
h2('2.2. Review Preprocessing')
|
| 375 |
+
# Dataset(df_review_raw,'Review Data Raw',REVIEWS_DATAPATH)
|
| 376 |
code_cell("""
|
| 377 |
from nltk.tokenize import word_tokenize
|
| 378 |
from nltk.corpus import stopwords
|
|
|
|
| 897 |
df = col_to_list(df,'Categories')
|
| 898 |
df = apply_price_range_labels(df,price_labels,price_bins)
|
| 899 |
""")
|
| 900 |
+
# Dataset(df_games,"The game dataset",GAMES_DATAPATH)
|
| 901 |
|
| 902 |
code_cell("""
|
| 903 |
def extract_year(date_str):
|
|
|
|
| 913 |
0,
|
| 914 |
(df['Positive'] / (df['Positive'] + df['Negative'])) * 100
|
| 915 |
)""")
|
| 916 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 917 |
code_cell("""
|
| 918 |
from sklearn.preprocessing import MultiLabelBinarizer,LabelEncoder,MinMaxScaler
|
| 919 |
genre_mlb = MultiLabelBinarizer()
|
|
|
|
| 935 |
categories_df = pd.DataFrame(categories_matrix,columns=categories_mlb.classes_,index=df.index)
|
| 936 |
game_df = pd.concat([df[['app_id','Price_range']+numerical_col],genre_df,categories_df],axis=1)""")
|
| 937 |
|
| 938 |
+
gr.Dataframe(df_games_temp.head(10))
|
| 939 |
+
del df_games_temp
|
| 940 |
+
gc.collect()
|
| 941 |
code_cell("""
|
| 942 |
from sklearn.neighbors import KNeighborsClassifier
|
| 943 |
X = game_df.loc[:,['Year_Release','Average playtime forever','Game score','DLC count','Price_range']+ list(genre_mlb.classes_) + list(categories_mlb.classes_)]
|