VJyzCELERY
commited on
Commit
·
dd2bea0
1
Parent(s):
69c4715
Optimized some thing
Browse files- app.py +19 -9
- component.py +9 -13
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -128,8 +128,6 @@ categories_mlb = categories_mlb.fit(df_games_temp['Categories'])
|
|
| 128 |
price_range_le = model.game_content_recommeder.price_range_encoder
|
| 129 |
scaler = MinMaxScaler()
|
| 130 |
scaler = scaler.fit(df_games_temp[['Year_Release','Average playtime forever','Game score','DLC count']].values)
|
| 131 |
-
app_id_le = LabelEncoder()
|
| 132 |
-
app_id_le = app_id_le.fit(df_games_temp['app_id'])
|
| 133 |
numerical_col =['Year_Release','Average playtime forever','Game score','DLC count']
|
| 134 |
|
| 135 |
genre_matrix = genre_mlb.transform(df_games_temp['Genres'])
|
|
@@ -139,6 +137,8 @@ categories_df = pd.DataFrame(categories_matrix,columns=categories_mlb.classes_,i
|
|
| 139 |
game_df = pd.concat([df_games_temp[['app_id','Price_range']+numerical_col],genre_df,categories_df],axis=1)
|
| 140 |
game_df['Price_range'] = price_range_le.transform(game_df['Price_range'])
|
| 141 |
game_df[numerical_col] = scaler.transform(game_df[numerical_col].values)
|
|
|
|
|
|
|
| 142 |
|
| 143 |
def recommend_game(description=None, app_name=None, price_range=None, year_release=None,
|
| 144 |
excpected_playtime=None, game_score=None, dlc_count=None,
|
|
@@ -266,9 +266,11 @@ With that, we wanted to try and make a game recommendation based on description
|
|
| 266 |
|
| 267 |
h2('2. Description of data')
|
| 268 |
code_cell('df.describe()')
|
| 269 |
-
gr.Dataframe(df_games_raw.describe())
|
| 270 |
-
|
| 271 |
-
|
|
|
|
|
|
|
| 272 |
dropdown = gr.Dropdown(choices=list(df_games_raw.columns), label="Select Column for Distribution",value=list(df_games_raw.columns)[0] if len(df_games_raw.columns) > 0 else None,allow_custom_value=True)
|
| 273 |
plot_output = gr.Plot(format='png')
|
| 274 |
dropdown.change(plot_distribution, inputs=[gr.State(df_games_raw), dropdown], outputs=plot_output)
|
|
@@ -280,9 +282,12 @@ With that, we wanted to try and make a game recommendation based on description
|
|
| 280 |
|
| 281 |
h2('2. Description of data')
|
| 282 |
code_cell('df.describe()')
|
| 283 |
-
gr.Dataframe(df_review_raw.describe())
|
| 284 |
|
| 285 |
-
h2('3.
|
|
|
|
|
|
|
|
|
|
| 286 |
dropdown = gr.Dropdown(choices=list(df_review_raw.columns), label="Select Column for Distribution",value=list(df_review_raw.columns)[0] if len(df_review_raw.columns) > 0 else None,allow_custom_value=True)
|
| 287 |
plot_output = gr.Plot(format='png')
|
| 288 |
dropdown.change(plot_distribution, inputs=[gr.State(df_review_raw), dropdown], outputs=plot_output)
|
|
@@ -544,7 +549,6 @@ df_liked = df_liked.drop_duplicates(subset=['steamid', 'app_id'])
|
|
| 544 |
p(f"Unique steamids: {df_liked['steamid'].nunique()}")
|
| 545 |
p(f"Unique app_ids: {df_liked['app_id'].nunique()}")
|
| 546 |
p(f"Total rows: {len(df_liked)}")
|
| 547 |
-
p(f"Unique (steamid, app_id) pairs: {df_liked.drop_duplicates(subset=['steamid', 'app_id']).shape[0]}")
|
| 548 |
h2("We're done here, next stop is Training!")
|
| 549 |
|
| 550 |
|
|
@@ -571,6 +575,8 @@ Training : {train_df.shape}
|
|
| 571 |
Testing : {test_df.shape}
|
| 572 |
Validation : {val_df.shape}
|
| 573 |
""")
|
|
|
|
|
|
|
| 574 |
code_cell("""
|
| 575 |
X_train = vectorizer.fit_transform(train_df['cleaned_review'])
|
| 576 |
y_train = review_app_id_encoder.fit_transform(train_df['app_id'])
|
|
@@ -616,6 +622,7 @@ classifier.fit(
|
|
| 616 |
plot_outputval = gr.Plot(format='png')
|
| 617 |
btnval = gr.Button("Generate Plot")
|
| 618 |
btnval.click(fn=lambda:plot_training_results(n_estimator,history['validation_0']['merror'],history['validation_1']['merror'],'Training error','Validation error','merror','N Estimator'), inputs=[], outputs=plot_outputval, preprocess=False)
|
|
|
|
| 619 |
y_pred = model.text_based_recommender.classifier.predict(vectorizer.transform(test_df['cleaned_review']))
|
| 620 |
y_test = model.text_based_recommender.app_id_encoder.transform(test_df['app_id'])
|
| 621 |
class_report = classification_report(y_test,y_pred)
|
|
@@ -743,6 +750,7 @@ class TextBasedRecommendation():
|
|
| 743 |
aggfunc='max',
|
| 744 |
fill_value=0
|
| 745 |
)
|
|
|
|
| 746 |
code_cell("""
|
| 747 |
top_n=3001
|
| 748 |
# Top n users with most reviews
|
|
@@ -760,7 +768,9 @@ user_item_matrix = df_liked.pivot_table(
|
|
| 760 |
fill_value=0
|
| 761 |
)
|
| 762 |
""")
|
| 763 |
-
gr.Dataframe(user_item_matrix
|
|
|
|
|
|
|
| 764 |
code_cell("""
|
| 765 |
from sklearn.decomposition import TruncatedSVD
|
| 766 |
X = user_item_matrix.T
|
|
|
|
| 128 |
price_range_le = model.game_content_recommeder.price_range_encoder
|
| 129 |
scaler = MinMaxScaler()
|
| 130 |
scaler = scaler.fit(df_games_temp[['Year_Release','Average playtime forever','Game score','DLC count']].values)
|
|
|
|
|
|
|
| 131 |
numerical_col =['Year_Release','Average playtime forever','Game score','DLC count']
|
| 132 |
|
| 133 |
genre_matrix = genre_mlb.transform(df_games_temp['Genres'])
|
|
|
|
| 137 |
game_df = pd.concat([df_games_temp[['app_id','Price_range']+numerical_col],genre_df,categories_df],axis=1)
|
| 138 |
game_df['Price_range'] = price_range_le.transform(game_df['Price_range'])
|
| 139 |
game_df[numerical_col] = scaler.transform(game_df[numerical_col].values)
|
| 140 |
+
del categories_matrix,genre_matrix,categories_df,genre_df,scaler,price_range_le,categories_mlb,genre_mlb
|
| 141 |
+
gc.collect()
|
| 142 |
|
| 143 |
def recommend_game(description=None, app_name=None, price_range=None, year_release=None,
|
| 144 |
excpected_playtime=None, game_score=None, dlc_count=None,
|
|
|
|
| 266 |
|
| 267 |
h2('2. Description of data')
|
| 268 |
code_cell('df.describe()')
|
| 269 |
+
gr.Dataframe(df_games_raw.describe().reset_index())
|
| 270 |
+
h2('3. Missing values')
|
| 271 |
+
gr.Dataframe(show_missing_values(df_games_raw))
|
| 272 |
+
|
| 273 |
+
h2('4. Distribution of data')
|
| 274 |
dropdown = gr.Dropdown(choices=list(df_games_raw.columns), label="Select Column for Distribution",value=list(df_games_raw.columns)[0] if len(df_games_raw.columns) > 0 else None,allow_custom_value=True)
|
| 275 |
plot_output = gr.Plot(format='png')
|
| 276 |
dropdown.change(plot_distribution, inputs=[gr.State(df_games_raw), dropdown], outputs=plot_output)
|
|
|
|
| 282 |
|
| 283 |
h2('2. Description of data')
|
| 284 |
code_cell('df.describe()')
|
| 285 |
+
gr.Dataframe(df_review_raw.describe().reset_index())
|
| 286 |
|
| 287 |
+
h2('3. Missing values')
|
| 288 |
+
gr.Dataframe(show_missing_values(df_review_raw))
|
| 289 |
+
|
| 290 |
+
h2('4. Distribution of data')
|
| 291 |
dropdown = gr.Dropdown(choices=list(df_review_raw.columns), label="Select Column for Distribution",value=list(df_review_raw.columns)[0] if len(df_review_raw.columns) > 0 else None,allow_custom_value=True)
|
| 292 |
plot_output = gr.Plot(format='png')
|
| 293 |
dropdown.change(plot_distribution, inputs=[gr.State(df_review_raw), dropdown], outputs=plot_output)
|
|
|
|
| 549 |
p(f"Unique steamids: {df_liked['steamid'].nunique()}")
|
| 550 |
p(f"Unique app_ids: {df_liked['app_id'].nunique()}")
|
| 551 |
p(f"Total rows: {len(df_liked)}")
|
|
|
|
| 552 |
h2("We're done here, next stop is Training!")
|
| 553 |
|
| 554 |
|
|
|
|
| 575 |
Testing : {test_df.shape}
|
| 576 |
Validation : {val_df.shape}
|
| 577 |
""")
|
| 578 |
+
del train_df,val_df
|
| 579 |
+
gc.collect()
|
| 580 |
code_cell("""
|
| 581 |
X_train = vectorizer.fit_transform(train_df['cleaned_review'])
|
| 582 |
y_train = review_app_id_encoder.fit_transform(train_df['app_id'])
|
|
|
|
| 622 |
plot_outputval = gr.Plot(format='png')
|
| 623 |
btnval = gr.Button("Generate Plot")
|
| 624 |
btnval.click(fn=lambda:plot_training_results(n_estimator,history['validation_0']['merror'],history['validation_1']['merror'],'Training error','Validation error','merror','N Estimator'), inputs=[], outputs=plot_outputval, preprocess=False)
|
| 625 |
+
|
| 626 |
y_pred = model.text_based_recommender.classifier.predict(vectorizer.transform(test_df['cleaned_review']))
|
| 627 |
y_test = model.text_based_recommender.app_id_encoder.transform(test_df['app_id'])
|
| 628 |
class_report = classification_report(y_test,y_pred)
|
|
|
|
| 750 |
aggfunc='max',
|
| 751 |
fill_value=0
|
| 752 |
)
|
| 753 |
+
user_item_matrix = user_item_matrix.reset_index().head(10)
|
| 754 |
code_cell("""
|
| 755 |
top_n=3001
|
| 756 |
# Top n users with most reviews
|
|
|
|
| 768 |
fill_value=0
|
| 769 |
)
|
| 770 |
""")
|
| 771 |
+
gr.Dataframe(user_item_matrix)
|
| 772 |
+
del user_item_matrix
|
| 773 |
+
gc.collect()
|
| 774 |
code_cell("""
|
| 775 |
from sklearn.decomposition import TruncatedSVD
|
| 776 |
X = user_item_matrix.T
|
component.py
CHANGED
|
@@ -54,16 +54,6 @@ def p(input:str):
|
|
| 54 |
|
| 55 |
# this for displaying dataframe and also provied downlaod csv
|
| 56 |
def Dataset(df,title, source, key=None):
|
| 57 |
-
"""
|
| 58 |
-
Creates a reusable dataset display component.
|
| 59 |
-
This is displaying title, dataframe, and provide download button
|
| 60 |
-
file path means file
|
| 61 |
-
Args:
|
| 62 |
-
df (pd.DataFrame): Dataset to display
|
| 63 |
-
title (str): Title for the dataset display
|
| 64 |
-
file_path (str): Path to the CSV file for download (the file name following the path)
|
| 65 |
-
key (str): Optional unique identifier for Gradio components
|
| 66 |
-
"""
|
| 67 |
def get_file():
|
| 68 |
return source
|
| 69 |
|
|
@@ -78,15 +68,14 @@ def Dataset(df,title, source, key=None):
|
|
| 78 |
)
|
| 79 |
|
| 80 |
# Dataframe display
|
| 81 |
-
|
| 82 |
-
value=df.head(
|
| 83 |
headers=list(df.columns),
|
| 84 |
elem_id=f"table-{key}" if key else None,
|
| 85 |
interactive=False, # read only
|
| 86 |
# disable the warp for reduce height of data
|
| 87 |
# wrap=True
|
| 88 |
)
|
| 89 |
-
return df_display
|
| 90 |
|
| 91 |
def describe_value_counts(series):
|
| 92 |
description = series.describe().to_frame(name='value')
|
|
@@ -196,6 +185,13 @@ def input_number(Label:str,Precision = 0,**kwargs):
|
|
| 196 |
**kwargs
|
| 197 |
)
|
| 198 |
return inputbox
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
def input_paragaph_textbox(Label:str, Placeholder:str):
|
| 201 |
"""
|
|
|
|
| 54 |
|
| 55 |
# this for displaying dataframe and also provied downlaod csv
|
| 56 |
def Dataset(df,title, source, key=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
def get_file():
|
| 58 |
return source
|
| 59 |
|
|
|
|
| 68 |
)
|
| 69 |
|
| 70 |
# Dataframe display
|
| 71 |
+
gr.Dataframe(
|
| 72 |
+
value=df.head(20),
|
| 73 |
headers=list(df.columns),
|
| 74 |
elem_id=f"table-{key}" if key else None,
|
| 75 |
interactive=False, # read only
|
| 76 |
# disable the warp for reduce height of data
|
| 77 |
# wrap=True
|
| 78 |
)
|
|
|
|
| 79 |
|
| 80 |
def describe_value_counts(series):
|
| 81 |
description = series.describe().to_frame(name='value')
|
|
|
|
| 185 |
**kwargs
|
| 186 |
)
|
| 187 |
return inputbox
|
| 188 |
+
def show_missing_values(df:pd.DataFrame):
|
| 189 |
+
try:
|
| 190 |
+
missing_df = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])
|
| 191 |
+
missing_df = missing_df.reset_index().rename(columns={'index': 'Column'})
|
| 192 |
+
return missing_df
|
| 193 |
+
except Exception as e:
|
| 194 |
+
return pd.DataFrame({'Error': [str(e)]})
|
| 195 |
|
| 196 |
def input_paragaph_textbox(Label:str, Placeholder:str):
|
| 197 |
"""
|
requirements.txt
CHANGED
|
@@ -6,5 +6,5 @@ matplotlib==3.5.3
|
|
| 6 |
nltk==3.8.1
|
| 7 |
numpy==1.25.2
|
| 8 |
pandas==2.3.0
|
| 9 |
-
scikit_learn==1.
|
| 10 |
xgboost==3.0.2
|
|
|
|
| 6 |
nltk==3.8.1
|
| 7 |
numpy==1.25.2
|
| 8 |
pandas==2.3.0
|
| 9 |
+
scikit_learn==1.6.0
|
| 10 |
xgboost==3.0.2
|