Spaces:

trohith89
/

Electronics-Sales-Classification

Sleeping

App Files Files Community

trohith89 commited on Jan 7

Commit

73e2c23

verified ·

1 Parent(s): 7bc16d5

Rename app.py to 1_Home.py

Browse files

Files changed (2) hide show

1_Home.py +16 -0
app.py +0 -114

1_Home.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import streamlit as st
+# Home Page
+st.title("Welcome to the Hotel Data Analysis App")
+st.markdown("""
+This application is designed to help analyze hotel data, uncover insights, and build predictive models.
+Navigate through the app using the sidebar to explore various functionalities.
+### Features:
+- **Introduction and About Data**: Learn about the dataset and download a sample file.
+- **EDA and Feature Engineering**: Upload and analyze your dataset to uncover patterns and relationships.
+- **Model Creation**: Build and evaluate machine learning models using your data.
+- **Conclusion**: Summarize findings and key insights.
+### Purpose:
+This app is tailored for exploring relationships between features like price, ratings, discounts, cashback, and hotel categories, ultimately enabling data-driven decision-making.
+**Get started by selecting a page from the sidebar!**
+""")

app.py DELETED Viewed

@@ -1,114 +0,0 @@
-import streamlit as st
-import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
-import seaborn as sns
-import plotly.express as px
-import warnings
-from sklearn.linear_model import LogisticRegression
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.model_selection import train_test_split, cross_val_score
-from sklearn.preprocessing import StandardScaler, LabelEncoder
-from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, log_loss
-import optuna
-from imblearn.over_sampling import SMOTE
-from sklearn.preprocessing import PolynomialFeatures
-warnings.filterwarnings('ignore')
-if st.button("EDA"):
-    st.switch_page("pages/EDA.py")
-# Streamlit App Title
-st.title("Consumer Electronics Sales Prediction App")
-# Upload CSV Dataset
-uploaded_file = st.file_uploader("Upload CSV File", type=["csv"])
-if uploaded_file:
-    data = pd.read_csv(uploaded_file)
-    df = data.copy()
-    st.write("### Raw Data:")
-    st.write(df.head())
-    # Data Preprocessing
-    df = df.rename(columns={'ProductCategory': 'Category', 'ProductBrand': 'Brand', 'ProductPrice': 'Price'})
-    df['Price'] = df['Price'].apply(lambda x: round(x, 2))
-    # Bin age into categories
-    bins = [0, 18, 35, 50, 65, 100]
-    labels = ['Child', 'Young Adult', 'Adult', 'Middle Aged', 'Senior']
-    df['age_bins'] = pd.cut(df['CustomerAge'], bins=bins, labels=labels, right=False)
-    # Encode age_bins to numerical values using LabelEncoder
-    le_age_bins = LabelEncoder()
-    df['age_bins'] = le_age_bins.fit_transform(df['age_bins'].astype(str))
-    # Show Data Description
-    st.write("### Data Description")
-    st.write(df.describe())
-    # Visualize Product Category Distribution
-    fig, ax = plt.subplots()
-    sns.countplot(x='Category', data=df, ax=ax, palette='viridis')
-    ax.set_title("Product Category Distribution")
-    st.pyplot(fig)
-    # Encode Categorical Features
-    le_category = LabelEncoder()
-    df['Category'] = le_category.fit_transform(df['Category'])
-    le_brand = LabelEncoder()
-    df['Brand'] = le_brand.fit_transform(df['Brand'])
-    # Feature Engineering with Polynomial Features
-    fv = df.drop(columns=['PurchaseIntent'])
-    cv = df['PurchaseIntent']
-    poly = PolynomialFeatures(degree=2, include_bias=False)
-    numeric_columns = [col for col in fv.select_dtypes(include=[float, int]).columns if col != 'ProductID']
-    poly_features = poly.fit_transform(fv[numeric_columns])
-    poly_feature_names = poly.get_feature_names_out(numeric_columns)
-    fv_with_poly = pd.DataFrame(poly_features, columns=poly_feature_names)
-    fv_with_poly = pd.concat([fv.reset_index(drop=True), fv_with_poly], axis=1)
-    # Handle Class Imbalance with SMOTE
-    smote = SMOTE()
-    X_resampled, y_resampled = smote.fit_resample(fv_with_poly, cv)
-    # Train-Test Split
-    X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
-    # Standardize the Data
-    scaler = StandardScaler()
-    X_train = scaler.fit_transform(X_train)
-    X_test = scaler.transform(X_test)
-    # Optuna Optimization
-    def objective(trial):
-        solver, penalty = trial.suggest_categorical("choices", [("lbfgs", "l2"), ("newton-cg", "l2"), ("sag", "l2"), ("saga", "l1"), ("saga", "l2"), ("saga", "elasticnet")])
-        C = trial.suggest_float("C", 0.01, 1000.0)
-        l1_ratio = trial.suggest_float("l1_ratio", 0, 1) if penalty == "elasticnet" else None
-        model = LogisticRegression(solver=solver, penalty=penalty, C=C, l1_ratio=l1_ratio if l1_ratio else None)
-        return -1 * cross_val_score(model, X_train, y_train, cv=5, scoring="neg_log_loss").mean()
-    study = optuna.create_study(direction="minimize")
-    study.optimize(objective, n_trials=100)
-    best_params = study.best_params
-    st.write("### Best Hyperparameters")
-    st.write(best_params)
-    # Train Final Model
-    final_model = LogisticRegression(**best_params)
-    final_model.fit(X_train, y_train)
-    acc = final_model.score(X_test, y_test)
-    st.write(f"### Test Accuracy: {acc:.2f}")
-    # Hugging Face Upload Section
-    st.write("#### Upload Model to Hugging Face")
-    if st.button("Upload to Hugging Face"):
-        import joblib
-        import huggingface_hub
-        joblib.dump(final_model, "model.joblib")
-        huggingface_hub.login(token="<YOUR_HUGGINGFACE_TOKEN>")
-        huggingface_hub.upload_file(path_or_fileobj="model.joblib", path_in_repo="model.joblib", repo_id="<your_repo>")
-        st.success("Model successfully uploaded!")