Spaces:
Sleeping
Sleeping
update remote
Browse files- app.py +78 -0
- data_generator.py +19 -0
- requirements.txt +5 -0
app.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import matplotlib.pyplot as plt
|
| 5 |
+
from sklearn.model_selection import train_test_split
|
| 6 |
+
from sklearn.preprocessing import StandardScaler
|
| 7 |
+
from sklearn.svm import SVC
|
| 8 |
+
from sklearn.metrics import classification_report, accuracy_score, ConfusionMatrixDisplay
|
| 9 |
+
import data_generator
|
| 10 |
+
|
| 11 |
+
# Load dataset from CSV
|
| 12 |
+
business_data = pd.read_csv("business_data.csv")
|
| 13 |
+
X = business_data.iloc[:, :-1].values
|
| 14 |
+
y = business_data.iloc[:, -1].values
|
| 15 |
+
|
| 16 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
| 17 |
+
scaler = StandardScaler()
|
| 18 |
+
X_train = scaler.fit_transform(X_train)
|
| 19 |
+
X_test = scaler.transform(X_test)
|
| 20 |
+
|
| 21 |
+
# Business Problem Description
|
| 22 |
+
"""
|
| 23 |
+
This simulated dataset represents a business classification problem where a company is trying to categorize customer behaviors
|
| 24 |
+
into two distinct segments. The classification is based on factors such as purchase history, engagement levels, and
|
| 25 |
+
customer loyalty indicators. The data is structured in a way that requires a non-linear classification approach, making it
|
| 26 |
+
an ideal case for Support Vector Machines with polynomial or RBF kernels.
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
# Streamlit App
|
| 30 |
+
st.title("SVM Business Classification App")
|
| 31 |
+
st.sidebar.header("Model Hyperparameters")
|
| 32 |
+
C = st.sidebar.slider("Regularization (C)", 0.01, 10.0, 1.0)
|
| 33 |
+
epsilon = st.sidebar.slider("Epsilon", 0.01, 1.0, 0.1)
|
| 34 |
+
|
| 35 |
+
# Tabs for different kernel types
|
| 36 |
+
tab1, tab2, tab3 = st.tabs(["Linear Kernel", "Polynomial Kernel", "RBF Kernel"])
|
| 37 |
+
|
| 38 |
+
def train_and_evaluate(kernel, degree=3, gamma='scale'):
|
| 39 |
+
model = SVC(C=C, kernel=kernel, degree=degree, gamma=gamma)
|
| 40 |
+
model.fit(X_train, y_train)
|
| 41 |
+
y_pred = model.predict(X_test)
|
| 42 |
+
accuracy = accuracy_score(y_test, y_pred)
|
| 43 |
+
report = classification_report(y_test, y_pred, output_dict=True)
|
| 44 |
+
return model, accuracy, report, y_pred
|
| 45 |
+
|
| 46 |
+
# Linear Kernel
|
| 47 |
+
with tab1:
|
| 48 |
+
st.subheader("Linear Kernel")
|
| 49 |
+
model, acc, report, y_pred = train_and_evaluate("linear")
|
| 50 |
+
st.write(f"**Accuracy:** {acc:.2f}")
|
| 51 |
+
st.write("**Classification Report:**", pd.DataFrame(report).transpose())
|
| 52 |
+
fig, ax = plt.subplots()
|
| 53 |
+
ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, ax=ax)
|
| 54 |
+
st.pyplot(fig)
|
| 55 |
+
|
| 56 |
+
# Polynomial Kernel
|
| 57 |
+
with tab2:
|
| 58 |
+
st.subheader("Polynomial Kernel")
|
| 59 |
+
degree = st.slider("Polynomial Degree", 2, 5, 3)
|
| 60 |
+
model, acc, report, y_pred = train_and_evaluate("poly", degree)
|
| 61 |
+
st.write(f"**Accuracy:** {acc:.2f}")
|
| 62 |
+
st.write("**Classification Report:**", pd.DataFrame(report).transpose())
|
| 63 |
+
fig, ax = plt.subplots()
|
| 64 |
+
ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, ax=ax)
|
| 65 |
+
st.pyplot(fig)
|
| 66 |
+
|
| 67 |
+
# RBF Kernel
|
| 68 |
+
with tab3:
|
| 69 |
+
st.subheader("RBF Kernel")
|
| 70 |
+
gamma = st.slider("Gamma", 0.01, 1.0, 0.1)
|
| 71 |
+
model, acc, report, y_pred = train_and_evaluate("rbf", gamma=gamma)
|
| 72 |
+
st.write(f"**Accuracy:** {acc:.2f}")
|
| 73 |
+
st.write("**Classification Report:**", pd.DataFrame(report).transpose())
|
| 74 |
+
fig, ax = plt.subplots()
|
| 75 |
+
ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, ax=ax)
|
| 76 |
+
st.pyplot(fig)
|
| 77 |
+
|
| 78 |
+
st.write("This app demonstrates how different SVM kernels impact classification performance in a non-linear business problem.")
|
data_generator.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from sklearn.datasets import make_moons
|
| 4 |
+
|
| 5 |
+
def generate_simulated_data():
|
| 6 |
+
"""Generates a simulated business classification dataset and saves it to a CSV file."""
|
| 7 |
+
np.random.seed(42)
|
| 8 |
+
X, y = make_moons(n_samples=300, noise=0.2, random_state=42)
|
| 9 |
+
|
| 10 |
+
# Convert to DataFrame
|
| 11 |
+
df = pd.DataFrame(X, columns=["Feature1", "Feature2"])
|
| 12 |
+
df["Target"] = y
|
| 13 |
+
|
| 14 |
+
# Save to CSV
|
| 15 |
+
df.to_csv("business_data.csv", index=False)
|
| 16 |
+
|
| 17 |
+
if __name__ == "__main__":
|
| 18 |
+
generate_simulated_data()
|
| 19 |
+
print("Simulated business dataset saved as 'business_data.csv'.")
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
numpy
|
| 3 |
+
pandas
|
| 4 |
+
matplotlib
|
| 5 |
+
scikit-learn
|