Spaces:

VJyzCELERY
/

DescriptiveGameRecommender

Sleeping

App Files Files Community

VJyzCELERY commited on Jun 6, 2025

Commit

dd2bea0

1 Parent(s): 69c4715

Optimized some thing

Browse files

Files changed (3) hide show

app.py +19 -9
component.py +9 -13
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -128,8 +128,6 @@ categories_mlb = categories_mlb.fit(df_games_temp['Categories'])
 price_range_le = model.game_content_recommeder.price_range_encoder
 scaler = MinMaxScaler()
 scaler = scaler.fit(df_games_temp[['Year_Release','Average playtime forever','Game score','DLC count']].values)
-app_id_le = LabelEncoder()
-app_id_le = app_id_le.fit(df_games_temp['app_id'])
 numerical_col =['Year_Release','Average playtime forever','Game score','DLC count']
 genre_matrix = genre_mlb.transform(df_games_temp['Genres'])
@@ -139,6 +137,8 @@ categories_df = pd.DataFrame(categories_matrix,columns=categories_mlb.classes_,i
 game_df = pd.concat([df_games_temp[['app_id','Price_range']+numerical_col],genre_df,categories_df],axis=1)
 game_df['Price_range'] = price_range_le.transform(game_df['Price_range'])
 game_df[numerical_col] = scaler.transform(game_df[numerical_col].values)
 def recommend_game(description=None, app_name=None, price_range=None, year_release=None,
             excpected_playtime=None, game_score=None, dlc_count=None,
@@ -266,9 +266,11 @@ With that, we wanted to try and make a game recommendation based on description
                 h2('2. Description of data')
                 code_cell('df.describe()')
-                gr.Dataframe(df_games_raw.describe())
-                h2('3. Distribution of data')
                 dropdown = gr.Dropdown(choices=list(df_games_raw.columns), label="Select Column for Distribution",value=list(df_games_raw.columns)[0] if len(df_games_raw.columns) > 0 else None,allow_custom_value=True)
                 plot_output = gr.Plot(format='png')
                 dropdown.change(plot_distribution, inputs=[gr.State(df_games_raw), dropdown], outputs=plot_output)
@@ -280,9 +282,12 @@ With that, we wanted to try and make a game recommendation based on description
                 h2('2. Description of data')
                 code_cell('df.describe()')
-                gr.Dataframe(df_review_raw.describe())
-                h2('3. Distribution of data')
                 dropdown = gr.Dropdown(choices=list(df_review_raw.columns), label="Select Column for Distribution",value=list(df_review_raw.columns)[0] if len(df_review_raw.columns) > 0 else None,allow_custom_value=True)
                 plot_output = gr.Plot(format='png')
                 dropdown.change(plot_distribution, inputs=[gr.State(df_review_raw), dropdown], outputs=plot_output)
@@ -544,7 +549,6 @@ df_liked = df_liked.drop_duplicates(subset=['steamid', 'app_id'])
                 p(f"Unique steamids: {df_liked['steamid'].nunique()}")
                 p(f"Unique app_ids: {df_liked['app_id'].nunique()}")
                 p(f"Total rows: {len(df_liked)}")
-                p(f"Unique (steamid, app_id) pairs: {df_liked.drop_duplicates(subset=['steamid', 'app_id']).shape[0]}")
                 h2("We're done here, next stop is Training!")
@@ -571,6 +575,8 @@ Training   : {train_df.shape}
 Testing    : {test_df.shape}
 Validation : {val_df.shape}
 """)
                 code_cell("""
 X_train = vectorizer.fit_transform(train_df['cleaned_review'])
 y_train = review_app_id_encoder.fit_transform(train_df['app_id'])
@@ -616,6 +622,7 @@ classifier.fit(
                 plot_outputval = gr.Plot(format='png')
                 btnval = gr.Button("Generate Plot")
                 btnval.click(fn=lambda:plot_training_results(n_estimator,history['validation_0']['merror'],history['validation_1']['merror'],'Training error','Validation error','merror','N Estimator'), inputs=[], outputs=plot_outputval, preprocess=False)
                 y_pred = model.text_based_recommender.classifier.predict(vectorizer.transform(test_df['cleaned_review']))
                 y_test = model.text_based_recommender.app_id_encoder.transform(test_df['app_id'])
                 class_report = classification_report(y_test,y_pred)
@@ -743,6 +750,7 @@ class TextBasedRecommendation():
                     aggfunc='max',
                     fill_value=0
                 )
                 code_cell("""
 top_n=3001
 # Top n users with most reviews
@@ -760,7 +768,9 @@ user_item_matrix = df_liked.pivot_table(
     fill_value=0
 )
 """)
-                gr.Dataframe(user_item_matrix.reset_index().head(10))
                 code_cell("""
 from sklearn.decomposition import TruncatedSVD
 X = user_item_matrix.T

 price_range_le = model.game_content_recommeder.price_range_encoder
 scaler = MinMaxScaler()
 scaler = scaler.fit(df_games_temp[['Year_Release','Average playtime forever','Game score','DLC count']].values)
 numerical_col =['Year_Release','Average playtime forever','Game score','DLC count']
 genre_matrix = genre_mlb.transform(df_games_temp['Genres'])
 game_df = pd.concat([df_games_temp[['app_id','Price_range']+numerical_col],genre_df,categories_df],axis=1)
 game_df['Price_range'] = price_range_le.transform(game_df['Price_range'])
 game_df[numerical_col] = scaler.transform(game_df[numerical_col].values)
+del categories_matrix,genre_matrix,categories_df,genre_df,scaler,price_range_le,categories_mlb,genre_mlb
+gc.collect()
 def recommend_game(description=None, app_name=None, price_range=None, year_release=None,
             excpected_playtime=None, game_score=None, dlc_count=None,
                 h2('2. Description of data')
                 code_cell('df.describe()')
+                gr.Dataframe(df_games_raw.describe().reset_index())
+                h2('3. Missing values')
+                gr.Dataframe(show_missing_values(df_games_raw))
+                h2('4. Distribution of data')
                 dropdown = gr.Dropdown(choices=list(df_games_raw.columns), label="Select Column for Distribution",value=list(df_games_raw.columns)[0] if len(df_games_raw.columns) > 0 else None,allow_custom_value=True)
                 plot_output = gr.Plot(format='png')
                 dropdown.change(plot_distribution, inputs=[gr.State(df_games_raw), dropdown], outputs=plot_output)
                 h2('2. Description of data')
                 code_cell('df.describe()')
+                gr.Dataframe(df_review_raw.describe().reset_index())
+                h2('3. Missing values')
+                gr.Dataframe(show_missing_values(df_review_raw))
+                h2('4. Distribution of data')
                 dropdown = gr.Dropdown(choices=list(df_review_raw.columns), label="Select Column for Distribution",value=list(df_review_raw.columns)[0] if len(df_review_raw.columns) > 0 else None,allow_custom_value=True)
                 plot_output = gr.Plot(format='png')
                 dropdown.change(plot_distribution, inputs=[gr.State(df_review_raw), dropdown], outputs=plot_output)
                 p(f"Unique steamids: {df_liked['steamid'].nunique()}")
                 p(f"Unique app_ids: {df_liked['app_id'].nunique()}")
                 p(f"Total rows: {len(df_liked)}")
                 h2("We're done here, next stop is Training!")
 Testing    : {test_df.shape}
 Validation : {val_df.shape}
 """)
+                del train_df,val_df
+                gc.collect()
                 code_cell("""
 X_train = vectorizer.fit_transform(train_df['cleaned_review'])
 y_train = review_app_id_encoder.fit_transform(train_df['app_id'])
                 plot_outputval = gr.Plot(format='png')
                 btnval = gr.Button("Generate Plot")
                 btnval.click(fn=lambda:plot_training_results(n_estimator,history['validation_0']['merror'],history['validation_1']['merror'],'Training error','Validation error','merror','N Estimator'), inputs=[], outputs=plot_outputval, preprocess=False)
                 y_pred = model.text_based_recommender.classifier.predict(vectorizer.transform(test_df['cleaned_review']))
                 y_test = model.text_based_recommender.app_id_encoder.transform(test_df['app_id'])
                 class_report = classification_report(y_test,y_pred)
                     aggfunc='max',
                     fill_value=0
                 )
+                user_item_matrix = user_item_matrix.reset_index().head(10)
                 code_cell("""
 top_n=3001
 # Top n users with most reviews
     fill_value=0
 )
 """)
+                gr.Dataframe(user_item_matrix)
+                del user_item_matrix
+                gc.collect()
                 code_cell("""
 from sklearn.decomposition import TruncatedSVD
 X = user_item_matrix.T

component.py CHANGED Viewed

@@ -54,16 +54,6 @@ def p(input:str):
 # this for displaying dataframe and also provied downlaod csv
 def Dataset(df,title, source, key=None):
-    """
-    Creates a reusable dataset display component.
-    This is displaying title, dataframe, and provide download button
-    file path means file
-    Args:
-        df (pd.DataFrame): Dataset to display
-        title (str): Title for the dataset display
-        file_path (str): Path to the CSV file for download (the file name following the path)
-        key (str): Optional unique identifier for Gradio components
-    """
     def get_file():
         return source
@@ -78,15 +68,14 @@ def Dataset(df,title, source, key=None):
             )
         # Dataframe display
-    df_display=gr.Dataframe(
-        value=df.head(100),
         headers=list(df.columns),
         elem_id=f"table-{key}" if key else None,
         interactive=False,    # read only
         # disable the warp for reduce height of data
         # wrap=True
     )
-    return df_display
 def describe_value_counts(series):
     description = series.describe().to_frame(name='value')
@@ -196,6 +185,13 @@ def input_number(Label:str,Precision = 0,**kwargs):
         **kwargs
     )
     return inputbox
 def input_paragaph_textbox(Label:str, Placeholder:str):
     """

 # this for displaying dataframe and also provied downlaod csv
 def Dataset(df,title, source, key=None):
     def get_file():
         return source
             )
         # Dataframe display
+    gr.Dataframe(
+        value=df.head(20),
         headers=list(df.columns),
         elem_id=f"table-{key}" if key else None,
         interactive=False,    # read only
         # disable the warp for reduce height of data
         # wrap=True
     )
 def describe_value_counts(series):
     description = series.describe().to_frame(name='value')
         **kwargs
     )
     return inputbox
+def show_missing_values(df:pd.DataFrame):
+    try:
+        missing_df = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])
+        missing_df = missing_df.reset_index().rename(columns={'index': 'Column'})
+        return missing_df
+    except Exception as e:
+        return pd.DataFrame({'Error': [str(e)]})
 def input_paragaph_textbox(Label:str, Placeholder:str):
     """

requirements.txt CHANGED Viewed

@@ -6,5 +6,5 @@ matplotlib==3.5.3
 nltk==3.8.1
 numpy==1.25.2
 pandas==2.3.0
-scikit_learn==1.3.0
 xgboost==3.0.2

 nltk==3.8.1
 numpy==1.25.2
 pandas==2.3.0
+scikit_learn==1.6.0
 xgboost==3.0.2