Spaces:

kheejay88
/

phone_price_category_classification

Build error

App Files Files Community

usernameiskheejay commited on Mar 3, 2025

Commit

e8bfbf5

1 Parent(s): 10d7565

ppc

Browse files

Files changed (2) hide show

app.py +134 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import streamlit as st
+import pandas as pd
+import pickle
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import plotly.figure_factory as ff
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler, LabelEncoder
+from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, ExtraTreesClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.svm import SVC
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.naive_bayes import GaussianNB
+from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
+from datasets import load_dataset
+# Load Data
+@st.cache_data
+def load_data():
+    train_df = load_dataset("kheejay88/phone_price_classification_train.csv")["train"].to_pandas()
+    test_df = pd.read_csv("kheejay88/phone_price_classification_test.csv")["train"].to_pandas()
+    return train_df, test_df
+train_df, test_df = load_data()
+# Data Preprocessing
+def preprocess_data(df):
+    df = df.copy()
+    df.fillna(df.median(), inplace=True)  # Handle missing values
+    label_encoders = {}
+    for col in df.select_dtypes(include=['object']).columns:
+        le = LabelEncoder()
+        df[col] = le.fit_transform(df[col])
+        label_encoders[col] = le
+    return df, label_encoders
+train_df, encoders = preprocess_data(train_df)
+# Splitting features and target variable
+X = train_df.drop(columns=['price_range'])  # Updated target variable
+y = train_df['price_range']
+# Splitting into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+# Standardizing the data
+scaler = StandardScaler()
+X_train = scaler.fit_transform(X_train)
+X_test = scaler.transform(X_test)
+# Model Training and Evaluation
+models = {
+    "Logistic Regression": LogisticRegression(),
+    "Random Forest": RandomForestClassifier(),
+    "Gradient Boosting": GradientBoostingClassifier(),
+    "AdaBoost": AdaBoostClassifier(),
+    "Extra Trees": ExtraTreesClassifier(),
+    "SVC": SVC(),
+    "Decision Tree": DecisionTreeClassifier(),
+    "K-Nearest Neighbors": KNeighborsClassifier(),
+    "Naive Bayes": GaussianNB()
+}
+performance = {}
+trained_models = {}
+for name, model in models.items():
+    model.fit(X_train, y_train)
+    y_pred = model.predict(X_test)
+    acc = accuracy_score(y_test, y_pred)
+    performance[name] = acc
+    trained_models[name] = model  # Store the trained model
+    # Save trained models
+    with open(f"{name.replace(' ', '_')}.pkl", "wb") as f:
+        pickle.dump(model, f)
+# Selecting the best model
+best_model_name = max(performance, key=performance.get)
+best_model = trained_models[best_model_name]
+# Streamlit UI
+st.title("📊 Machine Learning Model Evaluation App")
+st.write("This application evaluates multiple machine learning models for predicting phone price ranges based on various phone specifications.")
+# Data Overview
+st.write("## 🔍 Data Overview")
+st.write(train_df.head())
+# Data Visualization
+st.write("## 📈 Data Visualization")
+# Target Distribution
+st.write("### 🎯 Target Distribution")
+fig, ax = plt.subplots(figsize=(6, 4))
+sns.countplot(x=y, ax=ax)
+ax.set_xlabel("Price Range")
+ax.set_ylabel("Count")
+st.pyplot(fig)
+# Model Performance
+st.write("## 🏆 Model Performance")
+performance_df = pd.DataFrame.from_dict(performance, orient='index', columns=['Accuracy'])
+performance_df = performance_df.sort_values(by='Accuracy', ascending=False)
+st.table(performance_df)
+st.write(f"### 🎖️ Best Model: **{best_model_name}** with accuracy **{performance[best_model_name]:.4f}**")
+# Classification Report
+st.write("## 📊 Classification Report")
+y_pred_best = best_model.predict(X_test)
+report_dict = classification_report(y_test, y_pred_best, output_dict=True)
+report_df = pd.DataFrame(report_dict).transpose()
+st.dataframe(report_df.style.format("{:.2f}"))
+# Confusion Matrix
+st.write("## 🔥 Confusion Matrix")
+cm = confusion_matrix(y_test, y_pred_best)
+labels = list(map(str, np.unique(y_test)))  # Ensure labels are a list of strings
+fig_cm = ff.create_annotated_heatmap(
+    z=cm,
+    x=labels,
+    y=labels,
+    annotation_text=cm.astype(str),  # Show exact values inside the heatmap
+    colorscale='Blues',
+    showscale=True
+)
+st.plotly_chart(fig_cm)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+streamlit
+pandas
+numpy
+matplotlib
+seaborn
+scikit-learn
+plotly