VJyzCELERY commited on
Commit
dd2bea0
·
1 Parent(s): 69c4715

Optimized some thing

Browse files
Files changed (3) hide show
  1. app.py +19 -9
  2. component.py +9 -13
  3. requirements.txt +1 -1
app.py CHANGED
@@ -128,8 +128,6 @@ categories_mlb = categories_mlb.fit(df_games_temp['Categories'])
128
  price_range_le = model.game_content_recommeder.price_range_encoder
129
  scaler = MinMaxScaler()
130
  scaler = scaler.fit(df_games_temp[['Year_Release','Average playtime forever','Game score','DLC count']].values)
131
- app_id_le = LabelEncoder()
132
- app_id_le = app_id_le.fit(df_games_temp['app_id'])
133
  numerical_col =['Year_Release','Average playtime forever','Game score','DLC count']
134
 
135
  genre_matrix = genre_mlb.transform(df_games_temp['Genres'])
@@ -139,6 +137,8 @@ categories_df = pd.DataFrame(categories_matrix,columns=categories_mlb.classes_,i
139
  game_df = pd.concat([df_games_temp[['app_id','Price_range']+numerical_col],genre_df,categories_df],axis=1)
140
  game_df['Price_range'] = price_range_le.transform(game_df['Price_range'])
141
  game_df[numerical_col] = scaler.transform(game_df[numerical_col].values)
 
 
142
 
143
  def recommend_game(description=None, app_name=None, price_range=None, year_release=None,
144
  excpected_playtime=None, game_score=None, dlc_count=None,
@@ -266,9 +266,11 @@ With that, we wanted to try and make a game recommendation based on description
266
 
267
  h2('2. Description of data')
268
  code_cell('df.describe()')
269
- gr.Dataframe(df_games_raw.describe())
270
-
271
- h2('3. Distribution of data')
 
 
272
  dropdown = gr.Dropdown(choices=list(df_games_raw.columns), label="Select Column for Distribution",value=list(df_games_raw.columns)[0] if len(df_games_raw.columns) > 0 else None,allow_custom_value=True)
273
  plot_output = gr.Plot(format='png')
274
  dropdown.change(plot_distribution, inputs=[gr.State(df_games_raw), dropdown], outputs=plot_output)
@@ -280,9 +282,12 @@ With that, we wanted to try and make a game recommendation based on description
280
 
281
  h2('2. Description of data')
282
  code_cell('df.describe()')
283
- gr.Dataframe(df_review_raw.describe())
284
 
285
- h2('3. Distribution of data')
 
 
 
286
  dropdown = gr.Dropdown(choices=list(df_review_raw.columns), label="Select Column for Distribution",value=list(df_review_raw.columns)[0] if len(df_review_raw.columns) > 0 else None,allow_custom_value=True)
287
  plot_output = gr.Plot(format='png')
288
  dropdown.change(plot_distribution, inputs=[gr.State(df_review_raw), dropdown], outputs=plot_output)
@@ -544,7 +549,6 @@ df_liked = df_liked.drop_duplicates(subset=['steamid', 'app_id'])
544
  p(f"Unique steamids: {df_liked['steamid'].nunique()}")
545
  p(f"Unique app_ids: {df_liked['app_id'].nunique()}")
546
  p(f"Total rows: {len(df_liked)}")
547
- p(f"Unique (steamid, app_id) pairs: {df_liked.drop_duplicates(subset=['steamid', 'app_id']).shape[0]}")
548
  h2("We're done here, next stop is Training!")
549
 
550
 
@@ -571,6 +575,8 @@ Training : {train_df.shape}
571
  Testing : {test_df.shape}
572
  Validation : {val_df.shape}
573
  """)
 
 
574
  code_cell("""
575
  X_train = vectorizer.fit_transform(train_df['cleaned_review'])
576
  y_train = review_app_id_encoder.fit_transform(train_df['app_id'])
@@ -616,6 +622,7 @@ classifier.fit(
616
  plot_outputval = gr.Plot(format='png')
617
  btnval = gr.Button("Generate Plot")
618
  btnval.click(fn=lambda:plot_training_results(n_estimator,history['validation_0']['merror'],history['validation_1']['merror'],'Training error','Validation error','merror','N Estimator'), inputs=[], outputs=plot_outputval, preprocess=False)
 
619
  y_pred = model.text_based_recommender.classifier.predict(vectorizer.transform(test_df['cleaned_review']))
620
  y_test = model.text_based_recommender.app_id_encoder.transform(test_df['app_id'])
621
  class_report = classification_report(y_test,y_pred)
@@ -743,6 +750,7 @@ class TextBasedRecommendation():
743
  aggfunc='max',
744
  fill_value=0
745
  )
 
746
  code_cell("""
747
  top_n=3001
748
  # Top n users with most reviews
@@ -760,7 +768,9 @@ user_item_matrix = df_liked.pivot_table(
760
  fill_value=0
761
  )
762
  """)
763
- gr.Dataframe(user_item_matrix.reset_index().head(10))
 
 
764
  code_cell("""
765
  from sklearn.decomposition import TruncatedSVD
766
  X = user_item_matrix.T
 
128
  price_range_le = model.game_content_recommeder.price_range_encoder
129
  scaler = MinMaxScaler()
130
  scaler = scaler.fit(df_games_temp[['Year_Release','Average playtime forever','Game score','DLC count']].values)
 
 
131
  numerical_col =['Year_Release','Average playtime forever','Game score','DLC count']
132
 
133
  genre_matrix = genre_mlb.transform(df_games_temp['Genres'])
 
137
  game_df = pd.concat([df_games_temp[['app_id','Price_range']+numerical_col],genre_df,categories_df],axis=1)
138
  game_df['Price_range'] = price_range_le.transform(game_df['Price_range'])
139
  game_df[numerical_col] = scaler.transform(game_df[numerical_col].values)
140
+ del categories_matrix,genre_matrix,categories_df,genre_df,scaler,price_range_le,categories_mlb,genre_mlb
141
+ gc.collect()
142
 
143
  def recommend_game(description=None, app_name=None, price_range=None, year_release=None,
144
  excpected_playtime=None, game_score=None, dlc_count=None,
 
266
 
267
  h2('2. Description of data')
268
  code_cell('df.describe()')
269
+ gr.Dataframe(df_games_raw.describe().reset_index())
270
+ h2('3. Missing values')
271
+ gr.Dataframe(show_missing_values(df_games_raw))
272
+
273
+ h2('4. Distribution of data')
274
  dropdown = gr.Dropdown(choices=list(df_games_raw.columns), label="Select Column for Distribution",value=list(df_games_raw.columns)[0] if len(df_games_raw.columns) > 0 else None,allow_custom_value=True)
275
  plot_output = gr.Plot(format='png')
276
  dropdown.change(plot_distribution, inputs=[gr.State(df_games_raw), dropdown], outputs=plot_output)
 
282
 
283
  h2('2. Description of data')
284
  code_cell('df.describe()')
285
+ gr.Dataframe(df_review_raw.describe().reset_index())
286
 
287
+ h2('3. Missing values')
288
+ gr.Dataframe(show_missing_values(df_review_raw))
289
+
290
+ h2('4. Distribution of data')
291
  dropdown = gr.Dropdown(choices=list(df_review_raw.columns), label="Select Column for Distribution",value=list(df_review_raw.columns)[0] if len(df_review_raw.columns) > 0 else None,allow_custom_value=True)
292
  plot_output = gr.Plot(format='png')
293
  dropdown.change(plot_distribution, inputs=[gr.State(df_review_raw), dropdown], outputs=plot_output)
 
549
  p(f"Unique steamids: {df_liked['steamid'].nunique()}")
550
  p(f"Unique app_ids: {df_liked['app_id'].nunique()}")
551
  p(f"Total rows: {len(df_liked)}")
 
552
  h2("We're done here, next stop is Training!")
553
 
554
 
 
575
  Testing : {test_df.shape}
576
  Validation : {val_df.shape}
577
  """)
578
+ del train_df,val_df
579
+ gc.collect()
580
  code_cell("""
581
  X_train = vectorizer.fit_transform(train_df['cleaned_review'])
582
  y_train = review_app_id_encoder.fit_transform(train_df['app_id'])
 
622
  plot_outputval = gr.Plot(format='png')
623
  btnval = gr.Button("Generate Plot")
624
  btnval.click(fn=lambda:plot_training_results(n_estimator,history['validation_0']['merror'],history['validation_1']['merror'],'Training error','Validation error','merror','N Estimator'), inputs=[], outputs=plot_outputval, preprocess=False)
625
+
626
  y_pred = model.text_based_recommender.classifier.predict(vectorizer.transform(test_df['cleaned_review']))
627
  y_test = model.text_based_recommender.app_id_encoder.transform(test_df['app_id'])
628
  class_report = classification_report(y_test,y_pred)
 
750
  aggfunc='max',
751
  fill_value=0
752
  )
753
+ user_item_matrix = user_item_matrix.reset_index().head(10)
754
  code_cell("""
755
  top_n=3001
756
  # Top n users with most reviews
 
768
  fill_value=0
769
  )
770
  """)
771
+ gr.Dataframe(user_item_matrix)
772
+ del user_item_matrix
773
+ gc.collect()
774
  code_cell("""
775
  from sklearn.decomposition import TruncatedSVD
776
  X = user_item_matrix.T
component.py CHANGED
@@ -54,16 +54,6 @@ def p(input:str):
54
 
55
  # this for displaying dataframe and also provied downlaod csv
56
  def Dataset(df,title, source, key=None):
57
- """
58
- Creates a reusable dataset display component.
59
- This is displaying title, dataframe, and provide download button
60
- file path means file
61
- Args:
62
- df (pd.DataFrame): Dataset to display
63
- title (str): Title for the dataset display
64
- file_path (str): Path to the CSV file for download (the file name following the path)
65
- key (str): Optional unique identifier for Gradio components
66
- """
67
  def get_file():
68
  return source
69
 
@@ -78,15 +68,14 @@ def Dataset(df,title, source, key=None):
78
  )
79
 
80
  # Dataframe display
81
- df_display=gr.Dataframe(
82
- value=df.head(100),
83
  headers=list(df.columns),
84
  elem_id=f"table-{key}" if key else None,
85
  interactive=False, # read only
86
  # disable the warp for reduce height of data
87
  # wrap=True
88
  )
89
- return df_display
90
 
91
  def describe_value_counts(series):
92
  description = series.describe().to_frame(name='value')
@@ -196,6 +185,13 @@ def input_number(Label:str,Precision = 0,**kwargs):
196
  **kwargs
197
  )
198
  return inputbox
 
 
 
 
 
 
 
199
 
200
  def input_paragaph_textbox(Label:str, Placeholder:str):
201
  """
 
54
 
55
  # this for displaying dataframe and also provied downlaod csv
56
  def Dataset(df,title, source, key=None):
 
 
 
 
 
 
 
 
 
 
57
  def get_file():
58
  return source
59
 
 
68
  )
69
 
70
  # Dataframe display
71
+ gr.Dataframe(
72
+ value=df.head(20),
73
  headers=list(df.columns),
74
  elem_id=f"table-{key}" if key else None,
75
  interactive=False, # read only
76
  # disable the warp for reduce height of data
77
  # wrap=True
78
  )
 
79
 
80
  def describe_value_counts(series):
81
  description = series.describe().to_frame(name='value')
 
185
  **kwargs
186
  )
187
  return inputbox
188
+ def show_missing_values(df:pd.DataFrame):
189
+ try:
190
+ missing_df = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])
191
+ missing_df = missing_df.reset_index().rename(columns={'index': 'Column'})
192
+ return missing_df
193
+ except Exception as e:
194
+ return pd.DataFrame({'Error': [str(e)]})
195
 
196
  def input_paragaph_textbox(Label:str, Placeholder:str):
197
  """
requirements.txt CHANGED
@@ -6,5 +6,5 @@ matplotlib==3.5.3
6
  nltk==3.8.1
7
  numpy==1.25.2
8
  pandas==2.3.0
9
- scikit_learn==1.3.0
10
  xgboost==3.0.2
 
6
  nltk==3.8.1
7
  numpy==1.25.2
8
  pandas==2.3.0
9
+ scikit_learn==1.6.0
10
  xgboost==3.0.2