trohith89 commited on
Commit
a01c0f8
Β·
verified Β·
1 Parent(s): 2e48f30

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -93
app.py CHANGED
@@ -1,106 +1,107 @@
1
  import streamlit as st
2
- import numpy as np
3
  import pandas as pd
 
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
6
  import plotly.express as px
7
  import warnings
8
  from sklearn.linear_model import LogisticRegression
9
- from sklearn.model_selection import train_test_split
10
- from sklearn.preprocessing import LabelEncoder
11
- from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
12
- import time
 
 
 
13
 
14
  warnings.filterwarnings('ignore')
15
 
16
- st.set_page_config(page_title="Electronics Sales Prediction", layout="wide")
17
-
18
- st.title("πŸ“Š Consumer Electronics Sales Prediction App")
19
-
20
- # Load default dataset from file
21
- @st.cache_data
22
- def load_default_data():
23
- return pd.read_csv('/mnt/data/consumer_electronics_sales_data.csv')
24
 
25
- st.markdown("## πŸ“‚ Upload Your Dataset or Use Default")
26
- uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
27
 
28
- if uploaded_file is not None:
29
- with st.spinner('Loading Data...'):
30
- time.sleep(1)
31
  data = pd.read_csv(uploaded_file)
32
- st.success("Data Uploaded Successfully βœ…")
33
- else:
34
- try:
35
- st.info("πŸ“Š Using Default Dataset")
36
- data = load_default_data()
37
- except FileNotFoundError:
38
- st.error("🚨 Default dataset not found. Please upload a CSV file.")
39
- st.stop()
40
-
41
- st.subheader("πŸ‘€ Data Preview")
42
- st.write(data.head())
43
-
44
-
45
- df = data.copy()
46
-
47
- # Rename columns (Check if 'Sales' exists)
48
- if 'Sales' in df.columns:
49
- df = df.rename(columns={'Order Date': 'order_date', 'Category': 'category', 'Sub-Category': 'sub_category', 'Sales': 'sales'})
50
- else:
51
- st.error("🚨 'Sales' column not found in the dataset. Please check the file format.")
52
- st.stop()
53
-
54
- st.subheader("πŸ“Š Data Summary")
55
- st.write(df.describe())
56
-
57
- # Data Visualization
58
- st.subheader("πŸ“ˆ Sales Distribution")
59
- fig, ax = plt.subplots()
60
- sns.histplot(df['sales'], kde=True, color='skyblue', ax=ax)
61
- st.pyplot(fig)
62
-
63
- st.markdown("### πŸ”„ Encoding Categorical Variables")
64
- le = LabelEncoder()
65
- df['category'] = le.fit_transform(df['category'])
66
- df['sub_category'] = le.fit_transform(df['sub_category'])
67
- st.write("Categorical Encoding Done 🎯")
68
-
69
- # Train-test split
70
- X = df[['category', 'sub_category']]
71
- y = df['sales']
72
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
73
-
74
- st.markdown("### πŸš€ Model Training")
75
- model = LogisticRegression()
76
- model.fit(X_train, y_train)
77
-
78
- # Predictions
79
- y_pred = model.predict(X_test)
80
-
81
- # Evaluation
82
- st.markdown("### πŸ“ˆ Model Evaluation")
83
- accuracy = accuracy_score(y_test, y_pred)
84
- st.metric(label="Model Accuracy", value=f"{accuracy:.2%}")
85
- st.write("πŸ” Classification Report:")
86
- st.text(classification_report(y_test, y_pred))
87
-
88
- # Confusion Matrix
89
- st.subheader("🎯 Confusion Matrix")
90
- fig, ax = plt.subplots()
91
- sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues', ax=ax)
92
- st.pyplot(fig)
93
-
94
- # Additional Feature: Interactive Plot
95
- st.subheader("πŸ“Š Interactive Sales Analysis")
96
- fig = px.scatter(df, x='category', y='sales', color='sub_category', title="Sales by Category and Sub-Category")
97
- st.plotly_chart(fig)
98
-
99
- # Sidebar Information
100
- st.sidebar.title("πŸ“‹ App Navigation")
101
- st.sidebar.markdown("- Upload Dataset")
102
- st.sidebar.markdown("- View Data Summary")
103
- st.sidebar.markdown("- Train Model")
104
- st.sidebar.markdown("- View Results")
105
-
106
- st.sidebar.info("πŸ”§ **Ensure to preprocess your data properly for accurate results.**")
 
 
 
 
 
 
 
1
  import streamlit as st
 
2
  import pandas as pd
3
+ import numpy as np
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
6
  import plotly.express as px
7
  import warnings
8
  from sklearn.linear_model import LogisticRegression
9
+ from sklearn.neighbors import KNeighborsClassifier
10
+ from sklearn.model_selection import train_test_split, cross_val_score
11
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
12
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, log_loss
13
+ import optuna
14
+ from imblearn.over_sampling import SMOTE
15
+ from sklearn.preprocessing import PolynomialFeatures
16
 
17
  warnings.filterwarnings('ignore')
18
 
19
+ # Streamlit App Title
20
+ st.title("Consumer Electronics Sales Prediction App")
 
 
 
 
 
 
21
 
22
+ # Upload CSV Dataset
23
+ uploaded_file = st.file_uploader("Upload CSV File", type=["csv"])
24
 
25
+ if uploaded_file:
 
 
26
  data = pd.read_csv(uploaded_file)
27
+ df = data.copy()
28
+ st.write("### Raw Data:")
29
+ st.write(df.head())
30
+
31
+ # Data Preprocessing
32
+ df = df.rename(columns={'ProductCategory': 'Category', 'ProductBrand': 'Brand', 'ProductPrice': 'Price'})
33
+ df['Price'] = df['Price'].apply(lambda x: round(x, 2))
34
+
35
+ # Bin age into categories
36
+ bins = [0, 18, 35, 50, 65, 100]
37
+ labels = ['Child', 'Young Adult', 'Adult', 'Middle Aged', 'Senior']
38
+ df['age_bins'] = pd.cut(df['CustomerAge'], bins=bins, labels=labels, right=False)
39
+
40
+ # Show Data Description
41
+ st.write("### Data Description")
42
+ st.write(df.describe())
43
+
44
+ # Visualize Product Category Distribution
45
+ fig, ax = plt.subplots()
46
+ sns.countplot(x='Category', data=df, ax=ax, palette='viridis')
47
+ ax.set_title("Product Category Distribution")
48
+ st.pyplot(fig)
49
+
50
+ # Encode Categorical Features
51
+ le_category = LabelEncoder()
52
+ df['Category'] = le_category.fit_transform(df['Category'])
53
+ le_brand = LabelEncoder()
54
+ df['Brand'] = le_brand.fit_transform(df['Brand'])
55
+
56
+ # Feature Engineering with Polynomial Features
57
+ fv = df.drop(columns=['PurchaseIntent'])
58
+ cv = df['PurchaseIntent']
59
+ poly = PolynomialFeatures(degree=2, include_bias=False)
60
+ numeric_columns = [col for col in fv.select_dtypes(include=[float, int]).columns if col != 'ProductID']
61
+ poly_features = poly.fit_transform(fv[numeric_columns])
62
+ poly_feature_names = poly.get_feature_names_out(numeric_columns)
63
+ fv_with_poly = pd.DataFrame(poly_features, columns=poly_feature_names)
64
+ fv_with_poly = pd.concat([fv.reset_index(drop=True), fv_with_poly], axis=1)
65
+
66
+ # Handle Class Imbalance with SMOTE
67
+ smote = SMOTE()
68
+ X_resampled, y_resampled = smote.fit_resample(fv_with_poly, cv)
69
+
70
+ # Train-Test Split
71
+ X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
72
+
73
+ # Standardize the Data
74
+ scaler = StandardScaler()
75
+ X_train = scaler.fit_transform(X_train)
76
+ X_test = scaler.transform(X_test)
77
+
78
+ # Optuna Optimization
79
+ def objective(trial):
80
+ solver, penalty = trial.suggest_categorical("choices", [("lbfgs", "l2"), ("newton-cg", "l2"), ("sag", "l2"), ("saga", "l1"), ("saga", "l2"), ("saga", "elasticnet")])
81
+ C = trial.suggest_float("C", 0.01, 1000.0)
82
+ l1_ratio = trial.suggest_float("l1_ratio", 0, 1) if penalty == "elasticnet" else None
83
+ model = LogisticRegression(solver=solver, penalty=penalty, C=C, l1_ratio=l1_ratio if l1_ratio else None)
84
+ return -1 * cross_val_score(model, X_train, y_train, cv=5, scoring="neg_log_loss").mean()
85
+
86
+ study = optuna.create_study(direction="minimize")
87
+ study.optimize(objective, n_trials=100)
88
+
89
+ best_params = study.best_params
90
+ st.write("### Best Hyperparameters")
91
+ st.write(best_params)
92
+
93
+ # Train Final Model
94
+ final_model = LogisticRegression(**best_params)
95
+ final_model.fit(X_train, y_train)
96
+ acc = final_model.score(X_test, y_test)
97
+ st.write(f"### Test Accuracy: {acc:.2f}")
98
+
99
+ # Hugging Face Upload Section
100
+ st.write("#### Upload Model to Hugging Face")
101
+ if st.button("Upload to Hugging Face"):
102
+ import joblib
103
+ import huggingface_hub
104
+ joblib.dump(final_model, "model.joblib")
105
+ huggingface_hub.login(token="<YOUR_HUGGINGFACE_TOKEN>")
106
+ huggingface_hub.upload_file(path_or_fileobj="model.joblib", path_in_repo="model.joblib", repo_id="<your_repo>")
107
+ st.success("Model successfully uploaded!")