Spaces:

trohith89
/

Electronics-Sales-Classification

Sleeping

App Files Files Community

Electronics-Sales-Classification / app.py

trohith89

Update app.py

feece51 verified over 1 year ago

raw

history blame

4.54 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import plotly.express as px
	import warnings
	from sklearn.linear_model import LogisticRegression
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.model_selection import train_test_split, cross_val_score
	from sklearn.preprocessing import StandardScaler, LabelEncoder
	from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, log_loss
	import optuna
	from imblearn.over_sampling import SMOTE
	from sklearn.preprocessing import PolynomialFeatures

	warnings.filterwarnings('ignore')

	if st.button("EDA"):
	st.switch_page("pages/EDA.py")

	# Streamlit App Title
	st.title("Consumer Electronics Sales Prediction App")

	# Upload CSV Dataset
	uploaded_file = st.file_uploader("Upload CSV File", type=["csv"])

	if uploaded_file:
	data = pd.read_csv(uploaded_file)
	df = data.copy()
	st.write("### Raw Data:")
	st.write(df.head())

	# Data Preprocessing
	df = df.rename(columns={'ProductCategory': 'Category', 'ProductBrand': 'Brand', 'ProductPrice': 'Price'})
	df['Price'] = df['Price'].apply(lambda x: round(x, 2))

	# Bin age into categories
	bins = [0, 18, 35, 50, 65, 100]
	labels = ['Child', 'Young Adult', 'Adult', 'Middle Aged', 'Senior']
	df['age_bins'] = pd.cut(df['CustomerAge'], bins=bins, labels=labels, right=False)

	# Encode age_bins to numerical values using LabelEncoder
	le_age_bins = LabelEncoder()
	df['age_bins'] = le_age_bins.fit_transform(df['age_bins'].astype(str))

	# Show Data Description
	st.write("### Data Description")
	st.write(df.describe())

	# Visualize Product Category Distribution
	fig, ax = plt.subplots()
	sns.countplot(x='Category', data=df, ax=ax, palette='viridis')
	ax.set_title("Product Category Distribution")
	st.pyplot(fig)

	# Encode Categorical Features
	le_category = LabelEncoder()
	df['Category'] = le_category.fit_transform(df['Category'])
	le_brand = LabelEncoder()
	df['Brand'] = le_brand.fit_transform(df['Brand'])

	# Feature Engineering with Polynomial Features
	fv = df.drop(columns=['PurchaseIntent'])
	cv = df['PurchaseIntent']
	poly = PolynomialFeatures(degree=2, include_bias=False)
	numeric_columns = [col for col in fv.select_dtypes(include=[float, int]).columns if col != 'ProductID']
	poly_features = poly.fit_transform(fv[numeric_columns])
	poly_feature_names = poly.get_feature_names_out(numeric_columns)
	fv_with_poly = pd.DataFrame(poly_features, columns=poly_feature_names)
	fv_with_poly = pd.concat([fv.reset_index(drop=True), fv_with_poly], axis=1)

	# Handle Class Imbalance with SMOTE
	smote = SMOTE()
	X_resampled, y_resampled = smote.fit_resample(fv_with_poly, cv)

	# Train-Test Split
	X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

	# Standardize the Data
	scaler = StandardScaler()
	X_train = scaler.fit_transform(X_train)
	X_test = scaler.transform(X_test)

	# Optuna Optimization
	def objective(trial):
	solver, penalty = trial.suggest_categorical("choices", [("lbfgs", "l2"), ("newton-cg", "l2"), ("sag", "l2"), ("saga", "l1"), ("saga", "l2"), ("saga", "elasticnet")])
	C = trial.suggest_float("C", 0.01, 1000.0)
	l1_ratio = trial.suggest_float("l1_ratio", 0, 1) if penalty == "elasticnet" else None
	model = LogisticRegression(solver=solver, penalty=penalty, C=C, l1_ratio=l1_ratio if l1_ratio else None)
	return -1 * cross_val_score(model, X_train, y_train, cv=5, scoring="neg_log_loss").mean()

	study = optuna.create_study(direction="minimize")
	study.optimize(objective, n_trials=100)

	best_params = study.best_params
	st.write("### Best Hyperparameters")
	st.write(best_params)

	# Train Final Model
	final_model = LogisticRegression(**best_params)
	final_model.fit(X_train, y_train)
	acc = final_model.score(X_test, y_test)
	st.write(f"### Test Accuracy: {acc:.2f}")

	# Hugging Face Upload Section
	st.write("#### Upload Model to Hugging Face")
	if st.button("Upload to Hugging Face"):
	import joblib
	import huggingface_hub
	joblib.dump(final_model, "model.joblib")
	huggingface_hub.login(token="<YOUR_HUGGINGFACE_TOKEN>")
	huggingface_hub.upload_file(path_or_fileobj="model.joblib", path_in_repo="model.joblib", repo_id="<your_repo>")
	st.success("Model successfully uploaded!")