usernameiskheejay commited on
Commit
e8bfbf5
Β·
1 Parent(s): 10d7565
Files changed (2) hide show
  1. app.py +134 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import pickle
4
+ import numpy as np
5
+ import matplotlib.pyplot as plt
6
+ import seaborn as sns
7
+ import plotly.figure_factory as ff
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
10
+ from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, ExtraTreesClassifier
11
+ from sklearn.linear_model import LogisticRegression
12
+ from sklearn.svm import SVC
13
+ from sklearn.tree import DecisionTreeClassifier
14
+ from sklearn.neighbors import KNeighborsClassifier
15
+ from sklearn.naive_bayes import GaussianNB
16
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
17
+ from datasets import load_dataset
18
+
19
+ # Load Data
20
+ @st.cache_data
21
+ def load_data():
22
+ train_df = load_dataset("kheejay88/phone_price_classification_train.csv")["train"].to_pandas()
23
+ test_df = pd.read_csv("kheejay88/phone_price_classification_test.csv")["train"].to_pandas()
24
+ return train_df, test_df
25
+
26
+ train_df, test_df = load_data()
27
+
28
+ # Data Preprocessing
29
+ def preprocess_data(df):
30
+ df = df.copy()
31
+ df.fillna(df.median(), inplace=True) # Handle missing values
32
+ label_encoders = {}
33
+
34
+ for col in df.select_dtypes(include=['object']).columns:
35
+ le = LabelEncoder()
36
+ df[col] = le.fit_transform(df[col])
37
+ label_encoders[col] = le
38
+
39
+ return df, label_encoders
40
+
41
+ train_df, encoders = preprocess_data(train_df)
42
+
43
+ # Splitting features and target variable
44
+ X = train_df.drop(columns=['price_range']) # Updated target variable
45
+ y = train_df['price_range']
46
+
47
+ # Splitting into training and testing sets
48
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
49
+
50
+ # Standardizing the data
51
+ scaler = StandardScaler()
52
+ X_train = scaler.fit_transform(X_train)
53
+ X_test = scaler.transform(X_test)
54
+
55
+ # Model Training and Evaluation
56
+ models = {
57
+ "Logistic Regression": LogisticRegression(),
58
+ "Random Forest": RandomForestClassifier(),
59
+ "Gradient Boosting": GradientBoostingClassifier(),
60
+ "AdaBoost": AdaBoostClassifier(),
61
+ "Extra Trees": ExtraTreesClassifier(),
62
+ "SVC": SVC(),
63
+ "Decision Tree": DecisionTreeClassifier(),
64
+ "K-Nearest Neighbors": KNeighborsClassifier(),
65
+ "Naive Bayes": GaussianNB()
66
+ }
67
+
68
+ performance = {}
69
+ trained_models = {}
70
+
71
+ for name, model in models.items():
72
+ model.fit(X_train, y_train)
73
+ y_pred = model.predict(X_test)
74
+ acc = accuracy_score(y_test, y_pred)
75
+ performance[name] = acc
76
+ trained_models[name] = model # Store the trained model
77
+
78
+ # Save trained models
79
+ with open(f"{name.replace(' ', '_')}.pkl", "wb") as f:
80
+ pickle.dump(model, f)
81
+
82
+ # Selecting the best model
83
+ best_model_name = max(performance, key=performance.get)
84
+ best_model = trained_models[best_model_name]
85
+
86
+ # Streamlit UI
87
+ st.title("πŸ“Š Machine Learning Model Evaluation App")
88
+ st.write("This application evaluates multiple machine learning models for predicting phone price ranges based on various phone specifications.")
89
+
90
+ # Data Overview
91
+ st.write("## πŸ” Data Overview")
92
+ st.write(train_df.head())
93
+
94
+ # Data Visualization
95
+ st.write("## πŸ“ˆ Data Visualization")
96
+
97
+ # Target Distribution
98
+ st.write("### 🎯 Target Distribution")
99
+ fig, ax = plt.subplots(figsize=(6, 4))
100
+ sns.countplot(x=y, ax=ax)
101
+ ax.set_xlabel("Price Range")
102
+ ax.set_ylabel("Count")
103
+ st.pyplot(fig)
104
+
105
+ # Model Performance
106
+ st.write("## πŸ† Model Performance")
107
+ performance_df = pd.DataFrame.from_dict(performance, orient='index', columns=['Accuracy'])
108
+ performance_df = performance_df.sort_values(by='Accuracy', ascending=False)
109
+ st.table(performance_df)
110
+
111
+ st.write(f"### πŸŽ–οΈ Best Model: **{best_model_name}** with accuracy **{performance[best_model_name]:.4f}**")
112
+
113
+ # Classification Report
114
+ st.write("## πŸ“Š Classification Report")
115
+ y_pred_best = best_model.predict(X_test)
116
+ report_dict = classification_report(y_test, y_pred_best, output_dict=True)
117
+ report_df = pd.DataFrame(report_dict).transpose()
118
+ st.dataframe(report_df.style.format("{:.2f}"))
119
+
120
+ # Confusion Matrix
121
+ st.write("## πŸ”₯ Confusion Matrix")
122
+ cm = confusion_matrix(y_test, y_pred_best)
123
+ labels = list(map(str, np.unique(y_test))) # Ensure labels are a list of strings
124
+
125
+ fig_cm = ff.create_annotated_heatmap(
126
+ z=cm,
127
+ x=labels,
128
+ y=labels,
129
+ annotation_text=cm.astype(str), # Show exact values inside the heatmap
130
+ colorscale='Blues',
131
+ showscale=True
132
+ )
133
+
134
+ st.plotly_chart(fig_cm)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ numpy
4
+ matplotlib
5
+ seaborn
6
+ scikit-learn
7
+ plotly