Spaces:

VJyzCELERY
/

GameRecommenderInference

Sleeping

App Files Files Community

VJyzCELERY commited on Jun 5, 2025

Commit

5450dc1

1 Parent(s): 6f8c133

First Commit

Browse files

Files changed (5) hide show

GameRecommender.py +334 -0
app.py +232 -0
component.py +301 -0
requirements.txt +193 -0
style.css +208 -0

GameRecommender.py ADDED Viewed

	@@ -0,0 +1,334 @@

+import numpy as np
+import pandas as pd
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.preprocessing import MultiLabelBinarizer,LabelEncoder,MinMaxScaler
+from sklearn.feature_extraction.text import TfidfVectorizer
+import joblib
+from sklearn.decomposition import TruncatedSVD
+from sklearn.metrics import classification_report
+from xgboost import XGBClassifier
+import nltk
+from nltk.tokenize import word_tokenize
+from nltk.corpus import stopwords
+from nltk.stem import WordNetLemmatizer
+from nltk.tag import pos_tag
+import string
+import re
+import os
+nltk.download('punkt')
+nltk.download('averaged_perceptron_tagger_eng')
+nltk.download('wordnet')
+class CollaborativeRecommender:
+    def __init__(self, svd_matrix, item_to_index, index_to_item):
+        """
+        svd_matrix: 2D numpy array (items x latent features)
+        item_to_index: dict mapping app_id to row index in svd_matrix
+        index_to_item: dict mapping row index to app_id
+        """
+        self.svd_matrix : TruncatedSVD = svd_matrix
+        self.item_to_index = item_to_index
+        self.index_to_item = index_to_item
+    def save(self, path: str):
+        """Save the entire model as a single file using joblib."""
+        joblib.dump(self, path)
+    @staticmethod
+    def load(path: str):
+        """Load the entire model from a joblib file."""
+        return joblib.load(path)
+    def _get_item_vector(self, app_id):
+        idx = self.item_to_index.get(app_id)
+        if idx is None:
+            raise ValueError(f"app_id {app_id} not found in the model.")
+        return self.svd_matrix[idx]
+    def _cosine_similarity(self, vec, matrix):
+        # Cosine similarity between vec and all rows in matrix
+        vec_norm = np.linalg.norm(vec)
+        matrix_norms = np.linalg.norm(matrix, axis=1)
+        similarity = (matrix @ vec) / (matrix_norms * vec_norm + 1e-10)
+        return similarity
+    def get_similarities(self, app_ids,top_n=None):
+        """
+        Input: app_ids - single app_id or list of app_ids
+        Output: DataFrame with columns ['app_id', 'similarity'] sorted by similarity descending
+        """
+        if isinstance(app_ids, (str, int)):
+            app_ids = [app_ids]
+        elif not isinstance(app_ids, (list, tuple, np.ndarray)):
+            raise TypeError("app_ids must be a string/int or a list of such")
+        valid_vectors = []
+        missing_ids = []
+        for app_id in app_ids:
+            try:
+                vec = self._get_item_vector(app_id)
+                valid_vectors.append(vec)
+            except ValueError:
+                missing_ids.append(app_id)
+        if len(valid_vectors) == 0:
+            raise ValueError("None of the input app_ids were found in the model.")
+        # Aggregate vectors by averaging if multiple inputs
+        aggregated_vec = np.mean(valid_vectors, axis=0)
+        # Compute similarity with all items
+        similarities = self._cosine_similarity(aggregated_vec, self.svd_matrix)
+        # Build DataFrame of results
+        result_df = pd.DataFrame({
+            'app_id': [self.index_to_item[i] for i in range(len(similarities))],
+            'collaborative_similarity': similarities
+        })
+        # Exclude the input app_ids themselves from results
+        result_df = result_df[~result_df['app_id'].isin(app_ids)]
+        # Sort descending by similarity
+        result_df = result_df.sort_values('collaborative_similarity', ascending=False).reset_index(drop=True)
+        # If any input app_ids were missing, notify user (optional)
+        if missing_ids:
+            print(f"Warning: These app_ids were not found in the model and ignored: {missing_ids}")
+        if top_n:
+            return result_df.head(top_n)
+        else:
+            return result_df
+class GameContentRecommender:
+    def __init__(self,model,genre_encoder,category_encoder,price_range_encoder,scaler,app_id_encoder):
+        self.model : KNeighborsClassifier = model
+        self.genre_encoder : MultiLabelBinarizer = genre_encoder
+        self.category_encoder : MultiLabelBinarizer = category_encoder
+        self.price_range_encoder : LabelEncoder = price_range_encoder
+        self.scaler : MinMaxScaler = scaler
+        self.app_id_encoder : LabelEncoder = app_id_encoder
+    def save(self, path: str):
+        """Save the entire model as a single file using joblib."""
+        joblib.dump(self, path)
+    @staticmethod
+    def load(path: str):
+        """Load the entire model from a joblib file."""
+        return joblib.load(path)
+    def predict(self, price_range, year_release, average_playtime, game_score, dlc_count, genres, categories, top_n=None):
+        genre_dict = {g: 0 for g in self.genre_encoder.classes_}
+        categories_dict = {c: 0 for c in self.category_encoder.classes_}
+        for genre in genres:
+            if genre != 'Unknown' and genre in genre_dict:
+                genre_dict[genre] = 1
+        for category in categories:
+            if category != 'Unknown' and category in categories_dict:
+                categories_dict[category] = 1
+        price_range = self.price_range_encoder.transform(np.array(price_range).reshape(-1, 1))
+        scaled_features = self.scaler.transform(np.array([[year_release, average_playtime, game_score, dlc_count]]))[0]
+        user_vector = list(scaled_features) + list(price_range) + list(genre_dict.values()) + list(categories_dict.values())
+        user_df = pd.DataFrame([user_vector])
+        distances, indices = self.model.kneighbors(user_df)
+        distances = distances.flatten()
+        indices = indices.flatten()
+        similarity = 1 / (1 + distances)
+        app_ids = self.app_id_encoder.inverse_transform(indices)
+        prediction = pd.DataFrame({
+            'app_id': app_ids,
+            'content_probability': similarity
+        })
+        if top_n:
+            prediction = prediction.head(top_n)
+        return prediction
+class TextBasedRecommendation():
+    def __init__(self,classifier,vectorizer,app_id_encoder,history):
+        self.classifier : XGBClassifier = classifier
+        self.vectorizer : TfidfVectorizer = vectorizer
+        self.app_id_encoder : LabelEncoder = app_id_encoder
+        self.history = history
+    def save(self, path_prefix: str):
+        self.classifier.save_model(f"{path_prefix}_xgb.json")
+        classifier_backup = self.classifier
+        self.classifier = None
+        joblib.dump(self, f"{path_prefix}_preprocessor.joblib")
+        self.classifier = classifier_backup
+    @staticmethod
+    def load(path_prefix: str):
+        obj = joblib.load(f"{path_prefix}_preprocessor.joblib")
+        xgb = XGBClassifier()
+        xgb.load_model(f"{path_prefix}_xgb.json")
+        obj.classifier = xgb
+        return obj
+    def preprocess(self,text : str):
+        stopword = stopwords.words('english')
+        lemmatizer = WordNetLemmatizer()
+        def convert_postag(postag:str):
+            if postag.startswith('V'):
+                return 'v'
+            elif postag.startswith('R'):
+                return 'r'
+            elif postag.startswith('J'):
+                return 'a'
+            return 'n'
+        def clean_space(text : str):
+            if not isinstance(text, str):
+                return ''
+            cleaned = re.sub(r'\s+', ' ', text.replace('\n', ' ')).strip()
+            return cleaned
+        def tokenize(text : str):
+            text = text.lower()
+            text = clean_space(text)
+            token = word_tokenize(text)
+            token = [word for word in token if word not in
+                        string.punctuation and word not in stopword and word.isalpha()]
+            return token
+        # lemmatize
+        def lemmatizing(token : str):
+            postag = pos_tag(token)
+            lemmatized = [lemmatizer.lemmatize(word,convert_postag(tag)) for word,tag in postag]
+            return lemmatized
+        token = tokenize(text)
+        token = lemmatizing(token)
+        return " ".join(token)
+    def get_accuracy(self,X_test,y_test):
+        y_pred = self.classifier.predict(self.vectorizer.transform(X_test))
+        y_test = self.app_id_encoder.transform(y_test)
+        print(classification_report(y_test,y_pred))
+    def predict(self,text,top_n=None):
+        cleaned_text = self.preprocess(text)
+        vectorized_text = self.vectorizer.transform([cleaned_text])
+        proba = self.classifier.predict_proba(vectorized_text)[0]
+        class_indices = np.argsort(proba)[::-1]
+        if top_n is not None:
+            class_indices = class_indices[:top_n]
+        class_labels = self.app_id_encoder.inverse_transform(class_indices)
+        class_probs = proba[class_indices]
+        return pd.DataFrame({
+            'app_id': class_labels,
+            'text_probability': class_probs
+        })
+class GameRecommendationEnsemble:
+    def __init__(self,game_content_recommeder,collaborative_recommender,text_based_recommender):
+        self.game_content_recommeder : GameContentRecommender=game_content_recommeder
+        self.collaborative_recommender : CollaborativeRecommender=collaborative_recommender
+        self.text_based_recommender : TextBasedRecommendation = text_based_recommender
+    def save(self, dir_path: str):
+        os.makedirs(dir_path, exist_ok=True)
+        self.game_content_recommeder.save(os.path.join(dir_path, "game_content_recommender.joblib"))
+        self.collaborative_recommender.save(os.path.join(dir_path, "collaborative_recommender.joblib"))
+        self.text_based_recommender.save(os.path.join(dir_path, "text_based_recommender"))
+    @staticmethod
+    def load(dir_path: str):
+        game_content_recommender = GameContentRecommender.load(os.path.join(dir_path, "game_content_recommender.joblib"))
+        collaborative_recommender = CollaborativeRecommender.load(os.path.join(dir_path, "collaborative_recommender.joblib"))
+        text_based_recommender = TextBasedRecommendation.load(os.path.join(dir_path, "text_based_recommender"))
+        return GameRecommendationEnsemble(
+            game_content_recommender,
+            collaborative_recommender,
+            text_based_recommender
+        )
+    def scale_proba(self,series):
+        if len(series)<=1:
+            return pd.Series([1.0] * len(series), index=series.index)
+        scaler = MinMaxScaler()
+        scaled = scaler.fit_transform(series.values.reshape(-1, 1)).flatten()
+        return pd.Series(scaled, index=series.index)
+    def predict(self, description=None, app_ids=None, price_range=None, year_release=None,
+            average_playtime=None, game_score=None, dlc_count=None,
+            genres=None, categories=None, top_n=None,
+            weight_text=1.0, weight_collab=1.0, weight_content=1.0):
+        merge_dfs = []
+        if description is not None:
+            text_proba = self.text_based_recommender.predict(description)
+            text_proba['app_id'] = text_proba['app_id'].astype(str)
+            text_proba['text_probability'] = self.scale_proba(text_proba['text_probability'])
+            merge_dfs.append(text_proba)
+        else:
+            weight_text=0
+        # Collaborative similarity (only if app_ids is provided)
+        if app_ids is not None:
+            similar_app = self.collaborative_recommender.get_similarities(app_ids)
+            similar_app['app_id'] = similar_app['app_id'].astype(str)
+            similar_app['collaborative_similarity'] = self.scale_proba(similar_app['collaborative_similarity'])
+            merge_dfs.append(similar_app)
+        else:
+            weight_collab = 0  # No weight if not used
+        if None in (price_range, year_release,average_playtime,game_score,dlc_count, genres, categories):
+            weight_content=0
+        else:
+            similar_content = self.game_content_recommeder.predict(price_range, year_release,average_playtime,game_score,dlc_count, genres, categories)
+            similar_content['app_id'] = similar_content['app_id'].astype(str)
+            similar_content['content_probability'] = self.scale_proba(similar_content['content_probability'])
+            merge_dfs.append(similar_content)
+        if not merge_dfs:
+            return None
+        from functools import reduce
+        merged = reduce(lambda left, right: pd.merge(left, right, on='app_id', how='outer'), merge_dfs)
+        # Fill missing values
+        merged = merged.fillna(0)
+        # Final score calculation
+        def compute_aggregated_score(df, w_text, w_collab, w_content):
+            # Normalize weights (prevent divide-by-zero if one or more weights are 0)
+            total_weight = w_text + w_collab + w_content
+            if total_weight == 0:
+                raise ValueError("All weights are zero. At least one weight must be positive.")
+            w_text /= total_weight
+            w_collab /= total_weight
+            w_content /= total_weight
+            df['final_score'] = (
+                df.get('text_probability', 0) * w_text +
+                df.get('collaborative_similarity', 0) * w_collab +
+                df.get('content_probability', 0) * w_content
+            )
+            return df.sort_values(by='final_score', ascending=False).reset_index(drop=True)
+        final_df = compute_aggregated_score(merged, weight_text, weight_collab, weight_content)
+        if top_n:
+            return final_df.head(top_n)
+        else:
+            return final_df

app.py ADDED Viewed

	@@ -0,0 +1,232 @@

+import gradio as gr
+import pandas as pd
+import os
+from component import *
+from GameRecommender import *
+import gc
+from sklearn.model_selection import train_test_split
+from huggingface_hub import snapshot_download
+from sklearn.preprocessing import MultiLabelBinarizer,LabelEncoder,MinMaxScaler
+DATA_BASE_PATH = 'data'
+# MODEL_BASE_PATH = 'models'
+MODEL_BASE_PATH = snapshot_download(
+    repo_id="VJyzCELERY/SteamGameRecommender",
+    repo_type="model",
+    allow_patterns=["GameRecommender/*"]
+)
+SEED = 42
+RAW_GAMES_DATAPATH = os.path.join(DATA_BASE_PATH,'converted.csv')
+GAMES_DATAPATH = os.path.join(DATA_BASE_PATH,'Cleaned_games.csv')
+REVIEWS_DATAPATH = os.path.join(DATA_BASE_PATH,'MergedFragmentData_SAMPLE.csv')
+TRIMMED_REVIEW_DATAPATH = os.path.join(DATA_BASE_PATH,'Trimmed_Dataset.csv')
+USER_PREFERENCE_DATAPATH = os.path.join(DATA_BASE_PATH,'UserPreferenceDF.csv')
+MODEL_PATH = os.path.join(MODEL_BASE_PATH,'GameRecommender')
+from datasets import load_dataset
+GAMES_DS = load_dataset("VJyzCELERY/Cleaned_games")
+# load dataset
+model = GameRecommendationEnsemble.load(MODEL_PATH)
+vectorizer=model.text_based_recommender.vectorizer
+review_app_id_encoder=model.text_based_recommender.app_id_encoder
+genres = model.game_content_recommeder.genre_encoder.classes_.tolist()
+genres = [genre for genre in genres if genre != 'Unknown']
+categories = model.game_content_recommeder.category_encoder.classes_.tolist()
+categories = [cat for cat in categories if cat != 'Unknown']
+price_ranges = model.game_content_recommeder.price_range_encoder.classes_.tolist()
+selectable_app_ids = list(model.collaborative_recommender.item_to_index.keys())
+# df_games = pd.read_csv(GAMES_DATAPATH,index_col=False)
+df_games = GAMES_DS['train'].to_pandas()
+available_names = df_games[df_games['app_id'].astype(str).isin(selectable_app_ids)]['Name'].tolist()
+def recommend_game(description=None, app_name=None, price_range=None, year_release=None,
+            excpected_playtime=None, game_score=None, dlc_count=None,
+            genres=None, categories=None, top_n=5,weight_text=1.0, weight_collab=1.0, weight_content=1.0):
+    if app_name:
+        if isinstance(app_name, (str)):
+            app_name = [app_name]
+        app_ids = df_games[df_games['Name'].isin(app_name)]['app_id'].astype(str).tolist()
+    else:
+        app_ids = None
+    prediction = model.predict(description=description,app_ids=app_ids,price_range=price_range,year_release=year_release,average_playtime=excpected_playtime,game_score=game_score,
+                               dlc_count=dlc_count,genres=genres,categories=categories,top_n=top_n,weight_text=weight_text,weight_collab=weight_collab,weight_content=weight_content)
+    app_ids = prediction['app_id'].tolist()
+    output = df_games.loc[df_games['app_id'].astype(str).isin(app_ids)].reset_index()
+    return gr.DataFrame(value=output)
+# Load external CSS file
+with open('style.css', 'r') as f:
+    custom_css = f.read()
+# for nav
+def set_active_section(btn_id):
+    """
+    button active function and handle visibility section
+    """
+    # First set all sections to invisible
+    updates = [gr.update(visible=False) for _ in sections]
+    # Then set the selected section to visible
+    if btn_id in sections:
+        index = list(sections.keys()).index(btn_id)
+        updates[index] = gr.update(visible=True)
+    # Also update button active states
+    button_states = []
+    for btn in nav_buttons:
+        state = ("active" if btn.elem_id == btn_id else "")
+        button_states.append(gr.update(elem_classes=f"nav-btn {state}"))
+    return updates + button_states
+"""
+    MAIN DEMO
+"""
+with gr.Blocks(css = custom_css) as demo:
+    # container
+    with gr.Row(elem_classes="container"):
+        # navbar
+        with gr.Sidebar(elem_classes="navbar"):
+            # nav header
+            with gr.Column(elem_classes="nav-header"):
+                gr.Markdown("# Game Recommendation by Your Preference")
+            # nav button container
+            with gr.Column(elem_classes="nav-buttons"):
+                # nav button list
+                nav_buttons = []
+                sections = [
+                    ('Home', 'home'),
+                    ("Dataset", "dataset"),
+                    ("Exploratory Data Analysis", "eda"),
+                    ("Preprocessing Data", "preprocess"),
+                    ("Training Result", "training"),
+                    ("Our System", "system")
+                ]
+                # create button
+                for label, section_id in sections:
+                    button = gr.Button(label, elem_classes="nav-btn", elem_id=f"btn-{section_id}")
+                    nav_buttons.append(button)
+            # Recommendation system
+            with gr.Column(elem_id="system", elem_classes='content-section', visible=False) as system_section:
+                # special for this section
+                gr.HTML('<h1 class="header-title">Game Recommendation System</h1>', elem_id='system')
+                with gr.Row():
+                    with gr.Column(min_width=500, elem_classes='input-column'):
+                        app_name = input_choice(
+                            Label='Select games that you liked',
+                            Choices=available_names,
+                            Multiselect=True
+                        )
+                        year = input_number(
+                            Label='Year Release',
+                            Precision=0,
+                            minimum=0
+                        )
+                        expected_playtime = input_number(
+                            Label='Expected Playtime (Hours)',
+                            Precision=2,
+                            minimum=0
+                        )
+                        expected_score = input_number(
+                            Label='Expected Score (% Positive)',
+                            Precision=2,
+                            minimum=0
+                        )
+                        dlc_count = input_number(
+                            Label='DLC Count',
+                            Precision=0,
+                            minimum=0
+                        )
+                        description = input_paragaph_textbox('Description', 'Describe the game (max 1200 characters)...')
+                        genre = input_choice(
+                                Label="Select Your Genre (Multiple Choice)",
+                                Choices=genres,
+                                Multiselect=True
+                            )
+                        categories = input_choice(
+                                Label="Select Your Categories (Multiple Choice)",
+                                Choices=categories,
+                                Multiselect=True
+                            )
+                        # single selection (multiselect=False)
+                        price_range = input_choice(
+                                Label="Select Your Price Range (Only Single Choice)",
+                                Choices=price_ranges,
+                                Multiselect=False
+                            )
+                        top_n= input_number(
+                            Label='Output amount',
+                            Precision=0,
+                            minimum=0,
+                            value=10
+                        )
+                        weight_text = input_number(
+                            Label='Weight Text',
+                            Precision=2,
+                            minimum=0,
+                            maximum=1,
+                            value=0.5,
+                            step=0.01
+                        )
+                        weight_collab = input_number(
+                            Label='Weight Of Collaborative Model',
+                            Precision=2,
+                            minimum=0,
+                            maximum=1,
+                            value=0.5,
+                            step=0.01
+                        )
+                        weight_content = input_number(
+                            Label='Weight Of Content Based Model',
+                            Precision=2,
+                            minimum=0,
+                            maximum=1,
+                            value=0.5,
+                            step=0.01
+                        )
+                        submit_btn = gr.Button("Get Recommendations", variant="primary", elem_id="submit-btn")
+                    # Results column
+                    with gr.Column(min_width=500, elem_classes='results-column'):
+                        h2('Result')
+                        with gr.Column(elem_id='Output'):
+                            # Results column using the modular component
+                            h2('Recommended Game')
+                            recommended_game = gr.DataFrame()
+                        # click button logic
+                        submit_btn.click(
+                            fn=recommend_game,
+                            inputs=[description,app_name,price_range,year,expected_playtime,expected_score,dlc_count, genre, categories,top_n,weight_text,weight_collab,weight_content],
+                            outputs=recommended_game
+                        )
+    # Navigation logic
+    sections = {
+        "btn-system": system_section
+    }
+    # Set click events for navigation buttons
+    for btn in nav_buttons:
+        btn.click(
+            set_active_section,
+            inputs=gr.State(btn.elem_id),
+            outputs=list(sections.values()) + nav_buttons
+        )
+demo.launch()

component.py ADDED Viewed

	@@ -0,0 +1,301 @@

+import gradio as gr
+import pandas as pd
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import inspect
+import io
+# style formating for Header
+def header(input:str):
+    """
+    Usage:
+        header('your text')
+    Output:
+        <h1 class="header"> {input} <h1>
+        output will be bold. use for container header only
+    Args:
+        input (str): _header_Title_
+    """
+    gr.Markdown(f"# {input}", elem_classes='header')
+# style formating for Header2
+def h2(input:str):
+    """
+        Usage:
+            h2('your text')
+        Output:
+            <h2 class="subheader"> {input} <h2>
+        output will be bold. use for optional
+    Args:
+        input (str): _subheader_Title_
+    """
+    gr.Markdown(f'<h2 class="subheader" style="black">{input}</h2>')
+# style formating for Text
+def p(input:str):
+    """
+    Usage:
+        p('''
+            text <br>
+            text
+        ''')
+        or
+        p('text')
+    Outputs:
+        Multiple <p class="desc">...</p> blocks, one per paragraph.
+    """
+    paragraphs = input.strip().split("<br>")
+    text = ''.join(f'<p class="desc">{para.strip()}</p>' for para in paragraphs if para.strip())
+    return gr.Markdown(text)
+# this for displaying dataframe and also provied downlaod csv
+def Dataset(df,title, source, key=None):
+    """
+    Creates a reusable dataset display component.
+    This is displaying title, dataframe, and provide download button
+    file path means file
+    Args:
+        df (pd.DataFrame): Dataset to display
+        title (str): Title for the dataset display
+        file_path (str): Path to the CSV file for download (the file name following the path)
+        key (str): Optional unique identifier for Gradio components
+    """
+    def get_file():
+        return source
+    with gr.Column(elem_classes='dataframe-layout', elem_id=f"dataset-{key}" if key else None):
+        # Title and download button in a row
+        with gr.Row():
+            gr.Markdown(f'<h1 class="subtitle">{title}</h1>') # title formating
+            download_btn = gr.DownloadButton(
+                label="Download CSV",
+                value=get_file,
+                elem_id=f"download-{key}" if key else None
+            )
+        # Dataframe display
+    df_display=gr.Dataframe(
+        value=df.head(100),
+        headers=list(df.columns),
+        elem_id=f"table-{key}" if key else None,
+        interactive=False,    # read only
+        # disable the warp for reduce height of data
+        # wrap=True
+    )
+    return df_display
+def describe_value_counts(series):
+    description = series.describe().to_frame(name='value')
+    description = description.reset_index()                       # Move index (stat name) into column
+    description.columns = ['Statistic', 'Value']
+    return description
+# this is for EDA, preprocess
+def plot_distribution(df, column):
+    """
+    Generates a matplotlib plot (bar chart or histogram) showing the distribution
+    of values in a selected column from the dataframe.
+    Parameters:
+    -----------
+    df : pd.DataFrame
+        The dataframe to plot from.
+    column : str
+        The column name to visualize.
+    Returns:
+    --------
+    matplotlib.figure.Figure
+        A figure object representing the distribution plot.
+    """
+    fig, ax = plt.subplots(figsize=(10, 5))
+    if df[column].dtype == 'object' or df[column].nunique() < 20:
+        # Bar plot for categorical/small unique values
+        value_counts = df[column].value_counts().head(20)
+        ax.bar(value_counts.index, value_counts.values)
+        ax.set_xticklabels(value_counts.index, rotation=45, ha='right')
+        ax.set_ylabel('Count')
+        ax.set_title(f'Distribution of {column}')
+    else:
+        # Histogram for numerical
+        ax.hist(df[column].dropna(), bins=100, edgecolor='black')
+        ax.set_title(f'Distribution of {column}')
+        ax.set_xlabel(column)
+        ax.set_ylabel('Frequency')
+    fig.tight_layout()
+    return fig
+## this is for eda, preprocess, and training
+def code_cell(code):
+    """
+        simply syntax for gr.code
+    Usage :
+        Code_cell('df = pd.read_csv(path)')
+    or
+        using triple string for multiple line
+        code_cell("""""")
+    """
+    gr.Code(inspect.cleandoc(code), language='python')
+## This for EDA, Preprocess, and training
+def plot_training_results(results: dict):
+    """
+    Plots the training metrics: merror and mlogloss from the result dictionary.
+    This function generates a line plot that visualizes the model's training
+    performance over time (e.g., across epochs or folds), using the merror
+    (training error) and mlogloss (log loss) values.
+    Args:
+        results (dict): A dictionary containing two keys:
+            - 'merror': list of training error values.
+            - 'mlogloss': list of log loss values.
+            Example:
+                {
+                    "merror": [0.12, 0.10, 0.08],
+                    "mlogloss": [0.35, 0.32, 0.30]
+                }
+    Returns:
+        matplotlib.figure.Figure: A Matplotlib figure showing the trends of
+        training error and log loss as line plots.
+    Example:
+        results = {
+            "merror": [0.12, 0.10, 0.08],
+            "mlogloss": [0.35, 0.32, 0.30]
+        }
+        plot_output = gr.Plot()
+        btn = gr.Button("Generate Plot")
+        btn.click(fn=lambda:plot_training_results(results), inputs=[], outputs=plot_output, preprocess=False)
+    """
+    epochs = list(range(1, len(results["merror"]) + 1))
+    plt.figure(figsize=(8, 5))
+    plt.plot(epochs, results["merror"], marker='o', label='Training Error (merror)', color='blue')
+    plt.plot(epochs, results["mlogloss"], marker='s', label='Log Loss (mlogloss)', color='orange')
+    plt.title('Training Metrics Over Time')
+    plt.xlabel('Epoch / Fold')
+    plt.ylabel('Value')
+    plt.legend()
+    plt.grid(True)
+    plt.tight_layout()
+    return plt.gcf()
+# for Recommendation section
+def input_name_textbox(Label:str, Placeholder:str):
+    """
+    usage:
+        app_name = input_name_textbox('Input Your App', 'Enter game title...')
+    Args:
+        Label (str): Title textbox
+        Placeholder (str): placeholder text
+    Returns:
+        variable : str
+    """
+    inputbox = gr.Textbox(
+        label=Label,
+        placeholder=Placeholder,
+        elem_classes="text-input"
+    )
+    return inputbox
+def input_number(Label:str,Precision = 0,**kwargs):
+    """
+    usage:
+        app_name = input_number('Input Number', 'Enter game number...')
+    Args:
+        Label (str): Title textbox
+        Placeholder (str): placeholder text
+    Returns:
+        variable : str
+    """
+    inputbox = gr.Number(
+        label=Label,
+        elem_classes="text-input",
+        precision=Precision,
+        **kwargs
+    )
+    return inputbox
+def input_paragaph_textbox(Label:str, Placeholder:str):
+    """
+    usage:
+        paragraph = input_paragaph_textbox('Your Story', 'Type your text...')
+    Args:
+        Label (str): Title textbox
+        Placeholder (str): placeholder text
+    Returns:
+        variable : str
+    """
+    paragraph = gr.Textbox(
+            label=Label,
+            placeholder=Placeholder,
+            lines=5,
+            max_lines=8,
+            max_length=1200,
+            elem_classes="text-input"
+        )
+    return paragraph
+def input_choice(Label:str, Choices:list, Multiselect:bool):
+    """Allow user to select choices\n
+        Multiselect True for multiple choices\n
+        Multiselect False for single choices\n
+    Usage:\n
+        genre = gr.Dropdown(\n
+            label="Select Your Genre (Multiple Choice)",\n
+            choices=[\n
+                'Action', 'Adventure', 'RPG', 'Strategy', 'Simulation',\n
+                'Casual', 'Indie', 'Sports', 'Racing', 'Fighting',\n
+                'Puzzle', 'Shooter', 'Platformer', 'MMO', 'Horror',\n
+                'Survival', 'Open World', 'Visual Novel', 'Point & Click',\n
+                'Sandbox', 'Metroidvania', 'Tactical', 'Rhythm',\n
+                'Stealth', 'Rogue-like', 'Rogue-lite'\n
+            ],\n
+            multiselect=True,\n
+            value=[],\n
+            elem_classes="dropdown"\n
+        )\n
+    or only single choice \n
+        price_range_input = gr.Dropdown(\n
+            label="Select Your Price Range (Only Single Choice)",\n
+            choices=[\n
+                'Free',\n
+                '5$ - 10%',\n
+                '10$ - 50%',\n
+                '50$ - 100%',\n
+                '100$ - 500%',\n
+                'above 500%',\n
+            ],
+            multiselect=False,\n
+            value=[],\n
+            elem_classes="dropdown"\n
+        )\n
+    Args:\n
+        Label (str): _description_\n
+        Choices (list): _description_\n
+    """
+    multiple_choice = gr.Dropdown(
+        label=Label,
+        choices=Choices,
+        multiselect=Multiselect, # True Allowing multi select
+        value=[] if Multiselect else None, # the choosen value will be passed here
+        elem_classes="dropdown"
+    )
+    return multiple_choice

requirements.txt ADDED Viewed

	@@ -0,0 +1,193 @@

+# This file may be used to create an environment using:
+# $ conda create --name <env> --file <this file>
+# platform: win-64
+# created-by: conda 25.1.1
+_openmp_mutex=4.5=2_gnu
+aiofiles=24.1.0=pypi_0
+aiohappyeyeballs=2.6.1=pypi_0
+aiohttp=3.12.9=pypi_0
+aiosignal=1.3.2=pypi_0
+annotated-types=0.7.0=pypi_0
+anyio=4.9.0=pypi_0
+asttokens=3.0.0=pyhd8ed1ab_1
+async-timeout=5.0.1=pypi_0
+attrs=25.3.0=pypi_0
+blis=0.7.11=pypi_0
+bzip2=1.0.8=h2bbff1b_6
+ca-certificates=2025.4.26=h4c7d964_0
+catalogue=2.0.10=pypi_0
+certifi=2025.4.26=pypi_0
+charset-normalizer=3.4.2=pypi_0
+click=8.2.0=pypi_0
+cloudpickle=3.1.1=pypi_0
+colorama=0.4.6=pyhd8ed1ab_1
+comm=0.2.2=pyhd8ed1ab_1
+confection=0.1.5=pypi_0
+cpython=3.10.17=py310hd8ed1ab_0
+cuda-version=12.9=h4f385c5_3
+cycler=0.12.1=pypi_0
+cymem=2.0.11=pypi_0
+cython=0.29.32=pypi_0
+dask=2025.5.1=pypi_0
+datasets=3.6.0=pypi_0
+debugpy=1.8.14=py310h9e98ed7_0
+decorator=5.2.1=pyhd8ed1ab_0
+dill=0.3.8=pypi_0
+en-core-web-sm=3.5.0=pypi_0
+exceptiongroup=1.3.0=pyhd8ed1ab_0
+executing=2.2.0=pyhd8ed1ab_0
+fastapi=0.115.12=pypi_0
+ffmpy=0.6.0=pypi_0
+filelock=3.18.0=pypi_0
+fonttools=4.58.0=pypi_0
+frozenlist=1.6.2=pypi_0
+fsspec=2025.3.0=pypi_0
+fst-pso=1.8.1=pypi_0
+fuzzytm=2.0.9=pypi_0
+gensim=4.3.0=pypi_0
+gradio=5.32.1=pypi_0
+gradio-client=1.10.2=pypi_0
+groovy=0.1.2=pypi_0
+h11=0.16.0=pypi_0
+httpcore=1.0.9=pypi_0
+httpx=0.28.1=pypi_0
+huggingface-hub=0.32.4=pypi_0
+idna=3.10=pypi_0
+importlib-metadata=8.6.1=pyha770c72_0
+inquirerpy=0.3.4=pypi_0
+intel-openmp=2024.2.1=h57928b3_1083
+ipykernel=6.29.5=pyh4bbf305_0
+ipython=8.36.0=pyh9ab4c32_0
+jedi=0.19.2=pyhd8ed1ab_1
+jinja2=3.1.6=pypi_0
+joblib=1.5.0=pyhd8ed1ab_0
+jupyter_client=8.6.3=pyhd8ed1ab_1
+jupyter_core=5.7.2=pyh5737063_1
+kiwisolver=1.4.8=pypi_0
+krb5=1.21.3=hdf4eb48_0
+langcodes=3.5.0=pypi_0
+langdetect=1.0.9=pypi_0
+language-data=1.3.0=pypi_0
+libblas=3.9.0=31_h641d27c_mkl
+libcblas=3.9.0=31_h5e41251_mkl
+libffi=3.4.4=hd77b12b_1
+libgomp=15.1.0=h1383e82_2
+libhwloc=2.11.2=default_ha69328c_1001
+libiconv=1.18=h135ad9c_1
+liblapack=3.9.0=31_h1aa476e_mkl
+libsodium=1.0.20=hc70643c_0
+libwinpthread=12.0.0.r4.gg4f2fc60ca=h57928b3_9
+libxgboost=3.0.1=cuda128_hace5437_0
+libxml2=2.13.8=h866ff63_0
+locket=1.0.0=pypi_0
+marisa-trie=1.2.1=pypi_0
+markdown-it-py=3.0.0=pypi_0
+markupsafe=3.0.2=pypi_0
+matplotlib=3.5.3=pypi_0
+matplotlib-inline=0.1.7=pyhd8ed1ab_1
+mdurl=0.1.2=pypi_0
+miniful=0.0.6=pypi_0
+mkl=2024.2.2=h66d3029_15
+mpmath=1.3.0=pypi_0
+multidict=6.4.4=pypi_0
+multiprocess=0.70.16=pypi_0
+murmurhash=1.0.12=pypi_0
+nest-asyncio=1.6.0=pyhd8ed1ab_1
+networkx=3.4.2=pypi_0
+nltk=3.8.1=pypi_0
+numpy=1.25.2=py310hd02465a_0
+openssl=3.5.0=ha4e3fda_1
+orjson=3.10.18=pypi_0
+packaging=25.0=pyh29332c3_1
+pandas=2.1.4=pypi_0
+parso=0.8.4=pyhd8ed1ab_1
+partd=1.4.2=pypi_0
+pathlib-abc=0.1.1=pypi_0
+pathy=0.11.0=pypi_0
+pfzy=0.3.4=pypi_0
+pickleshare=0.7.5=pyhd8ed1ab_1004
+pillow=9.5.0=pypi_0
+pip=25.1=pyhc872135_2
+platformdirs=4.3.8=pyhe01879c_0
+preshed=3.0.9=pypi_0
+prompt-toolkit=3.0.51=pyha770c72_0
+propcache=0.3.1=pypi_0
+psutil=7.0.0=py310ha8f682b_0
+pure_eval=0.2.3=pyhd8ed1ab_1
+py-xgboost=3.0.1=cuda128_pyhee1328b_0
+pyarrow=20.0.0=pypi_0
+pycountry=24.6.1=pypi_0
+pydantic=2.11.5=pypi_0
+pydantic-core=2.33.2=pypi_0
+pydub=0.25.1=pypi_0
+pyfume=0.3.1=pypi_0
+pygments=2.19.1=pyhd8ed1ab_0
+pyparsing=3.2.3=pypi_0
+python=3.10.16=h4607a30_1
+python-dateutil=2.9.0.post0=pyhff2d567_1
+python-multipart=0.0.20=pypi_0
+python-tzdata=2025.2=pyhd8ed1ab_0
+python_abi=3.10=2_cp310
+pytz=2025.2=pyhd8ed1ab_0
+pywin32=307=py310h9e98ed7_3
+pyyaml=6.0.2=pypi_0
+pyzmq=26.4.0=py310h656833d_0
+regex=2024.11.6=pypi_0
+requests=2.32.3=pypi_0
+rich=14.0.0=pypi_0
+ruff=0.11.12=pypi_0
+safehttpx=0.1.6=pypi_0
+safetensors=0.5.3=pypi_0
+scikit-learn=1.3.0=pypi_0
+scipy=1.11.4=pypi_0
+seaborn=0.13.2=pypi_0
+semantic-version=2.10.0=pypi_0
+sentence-transformers=4.1.0=pypi_0
+setuptools=78.1.1=py310haa95532_0
+shellingham=1.5.4=pypi_0
+simpful=2.12.0=pypi_0
+six=1.17.0=pyhd8ed1ab_0
+smart-open=6.4.0=pypi_0
+sniffio=1.3.1=pypi_0
+spacy=3.5.3=pypi_0
+spacy-legacy=3.0.12=pypi_0
+spacy-loggers=1.0.5=pypi_0
+sqlite=3.45.3=h2bbff1b_0
+srsly=2.5.1=pypi_0
+stack_data=0.6.3=pyhd8ed1ab_1
+starlette=0.46.2=pypi_0
+swifter=1.4.0=pypi_0
+sympy=1.14.0=pypi_0
+tbb=2021.13.0=h62715c5_1
+thinc=8.1.12=pypi_0
+threadpoolctl=3.6.0=pyhecae5ae_0
+tk=8.6.14=h0416ee5_0
+tokenizers=0.21.1=pypi_0
+tomlkit=0.13.2=pypi_0
+toolz=1.0.0=pypi_0
+torch=2.7.0=pypi_0
+tornado=6.4.2=py310ha8f682b_0
+tqdm=4.67.1=pypi_0
+traitlets=5.14.3=pyhd8ed1ab_1
+transformers=4.51.3=pypi_0
+typer=0.16.0=pypi_0
+typing-inspection=0.4.1=pypi_0
+typing_extensions=4.13.2=pyh29332c3_0
+tzdata=2025b=h04d1e81_0
+ucrt=10.0.22621.0=h57928b3_1
+urllib3=2.4.0=pypi_0
+uvicorn=0.34.3=pypi_0
+vc=14.42=haa95532_5
+vc14_runtime=14.42.34438=hfd919c2_26
+vs2015_runtime=14.42.34438=h7142326_26
+wasabi=1.1.3=pypi_0
+wcwidth=0.2.13=pyhd8ed1ab_1
+websockets=15.0.1=pypi_0
+wheel=0.45.1=py310haa95532_0
+xgboost=3.0.1=cuda128_pyh68bd8d9_0
+xxhash=3.5.0=pypi_0
+xz=5.6.4=h4754444_1
+yarl=1.20.0=pypi_0
+zeromq=4.3.5=ha9f60a1_7
+zipp=3.21.0=pyhd8ed1ab_1
+zlib=1.2.13=h8cc25b3_1

style.css ADDED Viewed

	@@ -0,0 +1,208 @@

+.container {
+    /* display: flex; */
+    width: 100%;
+    /* min-height: 90vh; */
+    font-family: 'Arial', 'sans-serif';
+}
+.navbar {
+    width: 200px;
+    height: 100%;
+    border-right: 5px solid #34495e;
+    display: flex;
+    flex-direction: column;
+    padding: 0 10px;
+    /* justify-content: center; */
+    justify-content: flex-start;
+    background-color: #2c3e50;
+}
+.nav-header {
+    margin-top: 1rem;
+    margin-bottom: 2rem;
+}
+.nav-header h1 {
+    color: #fcdf1e;
+}
+.nav-buttons {
+    display: flex;
+    flex-direction: column;
+    gap: 0.5rem;
+    padding: 0 5px;
+}
+.nav-btn {
+    text-align: left;
+    padding: 10px 15px;
+    width: 100%;
+    background-color: #34495e;
+    color: #ecf0f1;
+    border: none;
+    border-radius: 4px;
+    cursor: pointer;
+    transition: all 0.3s ease;
+    font-weight: bold;
+}
+.nav-btn:hover {
+    background-color: #3d566e;
+    color: #fcdf1e;
+}
+.nav-btn.active {
+    background-color: #f39c12;
+    color: #2c3e50;
+}
+.main-content {
+    flex-grow: 1;
+    padding: 1rem;
+    display: flex;
+    flex-direction: column;
+}
+/* Section layout styling */
+.content-section {
+    border: 2px solid #ccc;
+    padding: 1rem !important;
+    margin-bottom: 1rem;
+    background-color: #f9f9f9;
+    border-radius: 8px;
+    height: auto !important;
+    min-height: 80vh;
+    overflow: visible !important;
+    /* padding: 20px !important; */
+}
+.content-section .header h1,
+.content-section .header * h1 {
+    color: #3d3d3c !important;
+    font-size: 1.5rem;
+    font-weight: bold;
+    border-bottom: 2px solid #ccc;
+    padding-bottom: 0.5rem;
+    margin-bottom: 1rem;
+}
+.content {
+    border: 2px solid #ccc;
+    padding: 0.5rem;
+    height: 80vh; /* Fixed height */
+    margin-bottom: 1rem;
+    background-color: #f9f9f9;
+    border-radius: 8px;
+    overflow-y: auto;
+}
+p.desc {
+    color: #3d3d3c !important;
+    /* color: white; */
+}
+/* dataset display  */
+/* Dataset Container */
+.datasets-container {
+    display: flex;
+    flex-direction: column;
+    gap: 30px;
+    width: 100%;
+}
+/* Dataset Layout */
+.dataframe-layout {
+    border: 1px solid #e0e0e0;
+    border-radius: 8px;
+    padding: 20px;
+    background-color: #fff;
+    box-shadow: 0 2px 10px rgba(0,0,0,0.05);
+}
+/* Title Styling */
+.subtitle {
+    font-size: 1.2rem !important;
+    font-weight: 600;
+    color: #2c3e50;
+    margin: 0 !important;
+    padding: 0 !important;
+}
+/* Download Button */
+.download-button {
+    background-color: #3498db !important;
+    color: white !important;
+    border: none !important;
+    padding: 8px 16px !important;
+    border-radius: 4px !important;
+    font-size: 0.9rem !important;
+}
+.download-button:hover {
+    background-color: #2980b9 !important;
+}
+/* Table Styling */
+.dataframe-layout table {
+    width: 100%;
+    border-collapse: collapse;
+    margin-top: 15px;
+}
+.dataframe-layout th {
+    background-color: #34495e;
+    color: white;
+    padding: 10px;
+    text-align: left;
+}
+.dataframe-layout td {
+    padding: 8px 10px;
+    border-bottom: 1px solid #dddddd;
+}
+.dataframe-layout tr:nth-child(even) {
+    background-color: #85a285;
+}
+.dataframe-layout tr:nth-child(odd) {
+    background-color: #466c45;
+}
+/* EDA  */
+.subheader{
+    font-weight: bold;
+    font-size: 24px;
+    color: #3d3d3c;
+    margin-bottom: 10px;
+}
+/* Recomendation system  */
+#system .header-title {
+    color: white;
+    font-size: 2rem;
+}
+#system {
+    background-color: #3d3d3c;
+}
+.dropdown, .text-input{
+  height: 100%;
+  flex: 1 1 auto;
+  /* background-color: #dddddd;  */
+  border: none;
+}
+.text-input label.gr-label,
+.dropdown label.gr-label {
+    color: #3d3d3c !important;
+}
+/* .results-column h2{
+    color: black;
+} */