trohith89's picture
Update app.py
feece51 verified
raw
history blame
4.54 kB
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, log_loss
import optuna
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import PolynomialFeatures
warnings.filterwarnings('ignore')
if st.button("EDA"):
st.switch_page("pages/EDA.py")
# Streamlit App Title
st.title("Consumer Electronics Sales Prediction App")
# Upload CSV Dataset
uploaded_file = st.file_uploader("Upload CSV File", type=["csv"])
if uploaded_file:
data = pd.read_csv(uploaded_file)
df = data.copy()
st.write("### Raw Data:")
st.write(df.head())
# Data Preprocessing
df = df.rename(columns={'ProductCategory': 'Category', 'ProductBrand': 'Brand', 'ProductPrice': 'Price'})
df['Price'] = df['Price'].apply(lambda x: round(x, 2))
# Bin age into categories
bins = [0, 18, 35, 50, 65, 100]
labels = ['Child', 'Young Adult', 'Adult', 'Middle Aged', 'Senior']
df['age_bins'] = pd.cut(df['CustomerAge'], bins=bins, labels=labels, right=False)
# Encode age_bins to numerical values using LabelEncoder
le_age_bins = LabelEncoder()
df['age_bins'] = le_age_bins.fit_transform(df['age_bins'].astype(str))
# Show Data Description
st.write("### Data Description")
st.write(df.describe())
# Visualize Product Category Distribution
fig, ax = plt.subplots()
sns.countplot(x='Category', data=df, ax=ax, palette='viridis')
ax.set_title("Product Category Distribution")
st.pyplot(fig)
# Encode Categorical Features
le_category = LabelEncoder()
df['Category'] = le_category.fit_transform(df['Category'])
le_brand = LabelEncoder()
df['Brand'] = le_brand.fit_transform(df['Brand'])
# Feature Engineering with Polynomial Features
fv = df.drop(columns=['PurchaseIntent'])
cv = df['PurchaseIntent']
poly = PolynomialFeatures(degree=2, include_bias=False)
numeric_columns = [col for col in fv.select_dtypes(include=[float, int]).columns if col != 'ProductID']
poly_features = poly.fit_transform(fv[numeric_columns])
poly_feature_names = poly.get_feature_names_out(numeric_columns)
fv_with_poly = pd.DataFrame(poly_features, columns=poly_feature_names)
fv_with_poly = pd.concat([fv.reset_index(drop=True), fv_with_poly], axis=1)
# Handle Class Imbalance with SMOTE
smote = SMOTE()
X_resampled, y_resampled = smote.fit_resample(fv_with_poly, cv)
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
# Standardize the Data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Optuna Optimization
def objective(trial):
solver, penalty = trial.suggest_categorical("choices", [("lbfgs", "l2"), ("newton-cg", "l2"), ("sag", "l2"), ("saga", "l1"), ("saga", "l2"), ("saga", "elasticnet")])
C = trial.suggest_float("C", 0.01, 1000.0)
l1_ratio = trial.suggest_float("l1_ratio", 0, 1) if penalty == "elasticnet" else None
model = LogisticRegression(solver=solver, penalty=penalty, C=C, l1_ratio=l1_ratio if l1_ratio else None)
return -1 * cross_val_score(model, X_train, y_train, cv=5, scoring="neg_log_loss").mean()
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=100)
best_params = study.best_params
st.write("### Best Hyperparameters")
st.write(best_params)
# Train Final Model
final_model = LogisticRegression(**best_params)
final_model.fit(X_train, y_train)
acc = final_model.score(X_test, y_test)
st.write(f"### Test Accuracy: {acc:.2f}")
# Hugging Face Upload Section
st.write("#### Upload Model to Hugging Face")
if st.button("Upload to Hugging Face"):
import joblib
import huggingface_hub
joblib.dump(final_model, "model.joblib")
huggingface_hub.login(token="<YOUR_HUGGINGFACE_TOKEN>")
huggingface_hub.upload_file(path_or_fileobj="model.joblib", path_in_repo="model.joblib", repo_id="<your_repo>")
st.success("Model successfully uploaded!")