Spaces:

trohith89
/

Electronics-Sales-Classification

Sleeping

App Files Files Community

trohith89 commited on Jan 6, 2025

Commit

a01c0f8

verified ·

1 Parent(s): 2e48f30

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -93

app.py CHANGED Viewed

@@ -1,106 +1,107 @@
 import streamlit as st
-import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 import seaborn as sns
 import plotly.express as px
 import warnings
 from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import LabelEncoder
-from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
-import time
 warnings.filterwarnings('ignore')
-st.set_page_config(page_title="Electronics Sales Prediction", layout="wide")
-st.title("📊 Consumer Electronics Sales Prediction App")
-# Load default dataset from file
-@st.cache_data
-def load_default_data():
-    return pd.read_csv('/mnt/data/consumer_electronics_sales_data.csv')
-st.markdown("## 📂 Upload Your Dataset or Use Default")
-uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
-if uploaded_file is not None:
-    with st.spinner('Loading Data...'):
-        time.sleep(1)
     data = pd.read_csv(uploaded_file)
-    st.success("Data Uploaded Successfully ✅")
-else:
-    try:
-        st.info("📊 Using Default Dataset")
-        data = load_default_data()
-    except FileNotFoundError:
-        st.error("🚨 Default dataset not found. Please upload a CSV file.")
-        st.stop()
-st.subheader("👀 Data Preview")
-st.write(data.head())
-df = data.copy()
-# Rename columns (Check if 'Sales' exists)
-if 'Sales' in df.columns:
-    df = df.rename(columns={'Order Date': 'order_date', 'Category': 'category', 'Sub-Category': 'sub_category', 'Sales': 'sales'})
-else:
-    st.error("🚨 'Sales' column not found in the dataset. Please check the file format.")
-    st.stop()
-st.subheader("📊 Data Summary")
-st.write(df.describe())
-# Data Visualization
-st.subheader("📈 Sales Distribution")
-fig, ax = plt.subplots()
-sns.histplot(df['sales'], kde=True, color='skyblue', ax=ax)
-st.pyplot(fig)
-st.markdown("### 🔄 Encoding Categorical Variables")
-le = LabelEncoder()
-df['category'] = le.fit_transform(df['category'])
-df['sub_category'] = le.fit_transform(df['sub_category'])
-st.write("Categorical Encoding Done 🎯")
-# Train-test split
-X = df[['category', 'sub_category']]
-y = df['sales']
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-st.markdown("### 🚀 Model Training")
-model = LogisticRegression()
-model.fit(X_train, y_train)
-# Predictions
-y_pred = model.predict(X_test)
-# Evaluation
-st.markdown("### 📈 Model Evaluation")
-accuracy = accuracy_score(y_test, y_pred)
-st.metric(label="Model Accuracy", value=f"{accuracy:.2%}")
-st.write("🔍 Classification Report:")
-st.text(classification_report(y_test, y_pred))
-# Confusion Matrix
-st.subheader("🎯 Confusion Matrix")
-fig, ax = plt.subplots()
-sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues', ax=ax)
-st.pyplot(fig)
-# Additional Feature: Interactive Plot
-st.subheader("📊 Interactive Sales Analysis")
-fig = px.scatter(df, x='category', y='sales', color='sub_category', title="Sales by Category and Sub-Category")
-st.plotly_chart(fig)
-# Sidebar Information
-st.sidebar.title("📋 App Navigation")
-st.sidebar.markdown("- Upload Dataset")
-st.sidebar.markdown("- View Data Summary")
-st.sidebar.markdown("- Train Model")
-st.sidebar.markdown("- View Results")
-st.sidebar.info("🔧 **Ensure to preprocess your data properly for accurate results.**")

 import streamlit as st
 import pandas as pd
+import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
 import plotly.express as px
 import warnings
 from sklearn.linear_model import LogisticRegression
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.model_selection import train_test_split, cross_val_score
+from sklearn.preprocessing import StandardScaler, LabelEncoder
+from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, log_loss
+import optuna
+from imblearn.over_sampling import SMOTE
+from sklearn.preprocessing import PolynomialFeatures
 warnings.filterwarnings('ignore')
+# Streamlit App Title
+st.title("Consumer Electronics Sales Prediction App")
+# Upload CSV Dataset
+uploaded_file = st.file_uploader("Upload CSV File", type=["csv"])
+if uploaded_file:
     data = pd.read_csv(uploaded_file)
+    df = data.copy()
+    st.write("### Raw Data:")
+    st.write(df.head())
+    # Data Preprocessing
+    df = df.rename(columns={'ProductCategory': 'Category', 'ProductBrand': 'Brand', 'ProductPrice': 'Price'})
+    df['Price'] = df['Price'].apply(lambda x: round(x, 2))
+    # Bin age into categories
+    bins = [0, 18, 35, 50, 65, 100]
+    labels = ['Child', 'Young Adult', 'Adult', 'Middle Aged', 'Senior']
+    df['age_bins'] = pd.cut(df['CustomerAge'], bins=bins, labels=labels, right=False)
+    # Show Data Description
+    st.write("### Data Description")
+    st.write(df.describe())
+    # Visualize Product Category Distribution
+    fig, ax = plt.subplots()
+    sns.countplot(x='Category', data=df, ax=ax, palette='viridis')
+    ax.set_title("Product Category Distribution")
+    st.pyplot(fig)
+    # Encode Categorical Features
+    le_category = LabelEncoder()
+    df['Category'] = le_category.fit_transform(df['Category'])
+    le_brand = LabelEncoder()
+    df['Brand'] = le_brand.fit_transform(df['Brand'])
+    # Feature Engineering with Polynomial Features
+    fv = df.drop(columns=['PurchaseIntent'])
+    cv = df['PurchaseIntent']
+    poly = PolynomialFeatures(degree=2, include_bias=False)
+    numeric_columns = [col for col in fv.select_dtypes(include=[float, int]).columns if col != 'ProductID']
+    poly_features = poly.fit_transform(fv[numeric_columns])
+    poly_feature_names = poly.get_feature_names_out(numeric_columns)
+    fv_with_poly = pd.DataFrame(poly_features, columns=poly_feature_names)
+    fv_with_poly = pd.concat([fv.reset_index(drop=True), fv_with_poly], axis=1)
+    # Handle Class Imbalance with SMOTE
+    smote = SMOTE()
+    X_resampled, y_resampled = smote.fit_resample(fv_with_poly, cv)
+    # Train-Test Split
+    X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
+    # Standardize the Data
+    scaler = StandardScaler()
+    X_train = scaler.fit_transform(X_train)
+    X_test = scaler.transform(X_test)
+    # Optuna Optimization
+    def objective(trial):
+        solver, penalty = trial.suggest_categorical("choices", [("lbfgs", "l2"), ("newton-cg", "l2"), ("sag", "l2"), ("saga", "l1"), ("saga", "l2"), ("saga", "elasticnet")])
+        C = trial.suggest_float("C", 0.01, 1000.0)
+        l1_ratio = trial.suggest_float("l1_ratio", 0, 1) if penalty == "elasticnet" else None
+        model = LogisticRegression(solver=solver, penalty=penalty, C=C, l1_ratio=l1_ratio if l1_ratio else None)
+        return -1 * cross_val_score(model, X_train, y_train, cv=5, scoring="neg_log_loss").mean()
+    study = optuna.create_study(direction="minimize")
+    study.optimize(objective, n_trials=100)
+    best_params = study.best_params
+    st.write("### Best Hyperparameters")
+    st.write(best_params)
+    # Train Final Model
+    final_model = LogisticRegression(**best_params)
+    final_model.fit(X_train, y_train)
+    acc = final_model.score(X_test, y_test)
+    st.write(f"### Test Accuracy: {acc:.2f}")
+    # Hugging Face Upload Section
+    st.write("#### Upload Model to Hugging Face")
+    if st.button("Upload to Hugging Face"):
+        import joblib
+        import huggingface_hub
+        joblib.dump(final_model, "model.joblib")
+        huggingface_hub.login(token="<YOUR_HUGGINGFACE_TOKEN>")
+        huggingface_hub.upload_file(path_or_fileobj="model.joblib", path_in_repo="model.joblib", repo_id="<your_repo>")
+        st.success("Model successfully uploaded!")