| import streamlit as st |
| import pandas as pd |
| import numpy as np |
| import matplotlib.pyplot as plt |
| import seaborn as sns |
| import plotly.express as px |
| import warnings |
| from sklearn.linear_model import LogisticRegression |
| from sklearn.neighbors import KNeighborsClassifier |
| from sklearn.model_selection import train_test_split, cross_val_score |
| from sklearn.preprocessing import StandardScaler, LabelEncoder |
| from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, log_loss |
| import optuna |
| from imblearn.over_sampling import SMOTE |
| from sklearn.preprocessing import PolynomialFeatures |
|
|
| warnings.filterwarnings('ignore') |
|
|
| if st.button("EDA"): |
| st.switch_page("pages/EDA.py") |
|
|
| |
| st.title("Consumer Electronics Sales Prediction App") |
|
|
| |
| uploaded_file = st.file_uploader("Upload CSV File", type=["csv"]) |
|
|
| if uploaded_file: |
| data = pd.read_csv(uploaded_file) |
| df = data.copy() |
| st.write("### Raw Data:") |
| st.write(df.head()) |
|
|
| |
| df = df.rename(columns={'ProductCategory': 'Category', 'ProductBrand': 'Brand', 'ProductPrice': 'Price'}) |
| df['Price'] = df['Price'].apply(lambda x: round(x, 2)) |
|
|
| |
| bins = [0, 18, 35, 50, 65, 100] |
| labels = ['Child', 'Young Adult', 'Adult', 'Middle Aged', 'Senior'] |
| df['age_bins'] = pd.cut(df['CustomerAge'], bins=bins, labels=labels, right=False) |
|
|
| |
| le_age_bins = LabelEncoder() |
| df['age_bins'] = le_age_bins.fit_transform(df['age_bins'].astype(str)) |
|
|
| |
| st.write("### Data Description") |
| st.write(df.describe()) |
|
|
| |
| fig, ax = plt.subplots() |
| sns.countplot(x='Category', data=df, ax=ax, palette='viridis') |
| ax.set_title("Product Category Distribution") |
| st.pyplot(fig) |
|
|
| |
| le_category = LabelEncoder() |
| df['Category'] = le_category.fit_transform(df['Category']) |
| le_brand = LabelEncoder() |
| df['Brand'] = le_brand.fit_transform(df['Brand']) |
|
|
| |
| fv = df.drop(columns=['PurchaseIntent']) |
| cv = df['PurchaseIntent'] |
| poly = PolynomialFeatures(degree=2, include_bias=False) |
| numeric_columns = [col for col in fv.select_dtypes(include=[float, int]).columns if col != 'ProductID'] |
| poly_features = poly.fit_transform(fv[numeric_columns]) |
| poly_feature_names = poly.get_feature_names_out(numeric_columns) |
| fv_with_poly = pd.DataFrame(poly_features, columns=poly_feature_names) |
| fv_with_poly = pd.concat([fv.reset_index(drop=True), fv_with_poly], axis=1) |
|
|
| |
| smote = SMOTE() |
| X_resampled, y_resampled = smote.fit_resample(fv_with_poly, cv) |
|
|
| |
| X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42) |
|
|
| |
| scaler = StandardScaler() |
| X_train = scaler.fit_transform(X_train) |
| X_test = scaler.transform(X_test) |
|
|
| |
| def objective(trial): |
| solver, penalty = trial.suggest_categorical("choices", [("lbfgs", "l2"), ("newton-cg", "l2"), ("sag", "l2"), ("saga", "l1"), ("saga", "l2"), ("saga", "elasticnet")]) |
| C = trial.suggest_float("C", 0.01, 1000.0) |
| l1_ratio = trial.suggest_float("l1_ratio", 0, 1) if penalty == "elasticnet" else None |
| model = LogisticRegression(solver=solver, penalty=penalty, C=C, l1_ratio=l1_ratio if l1_ratio else None) |
| return -1 * cross_val_score(model, X_train, y_train, cv=5, scoring="neg_log_loss").mean() |
|
|
| study = optuna.create_study(direction="minimize") |
| study.optimize(objective, n_trials=100) |
|
|
| best_params = study.best_params |
| st.write("### Best Hyperparameters") |
| st.write(best_params) |
|
|
| |
| final_model = LogisticRegression(**best_params) |
| final_model.fit(X_train, y_train) |
| acc = final_model.score(X_test, y_test) |
| st.write(f"### Test Accuracy: {acc:.2f}") |
|
|
| |
| st.write("#### Upload Model to Hugging Face") |
| if st.button("Upload to Hugging Face"): |
| import joblib |
| import huggingface_hub |
| joblib.dump(final_model, "model.joblib") |
| huggingface_hub.login(token="<YOUR_HUGGINGFACE_TOKEN>") |
| huggingface_hub.upload_file(path_or_fileobj="model.joblib", path_in_repo="model.joblib", repo_id="<your_repo>") |
| st.success("Model successfully uploaded!") |
|
|