Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import numpy as np | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.svm import SVC | |
| from sklearn.metrics import classification_report, accuracy_score, ConfusionMatrixDisplay | |
| import data_generator | |
| # Load dataset from CSV | |
| business_data = pd.read_csv("business_data.csv") | |
| X = business_data.iloc[:, :-1].values | |
| y = business_data.iloc[:, -1].values | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) | |
| scaler = StandardScaler() | |
| X_train = scaler.fit_transform(X_train) | |
| X_test = scaler.transform(X_test) | |
| # Business Problem Description | |
| """ | |
| This simulated dataset represents a business classification problem where a company is trying to categorize customer behaviors | |
| into two distinct segments. The classification is based on factors such as purchase history, engagement levels, and | |
| customer loyalty indicators. The data is structured in a way that requires a non-linear classification approach, making it | |
| an ideal case for Support Vector Machines with polynomial or RBF kernels. | |
| """ | |
| # Streamlit App | |
| st.title("SVM Business Classification App") | |
| st.sidebar.header("Model Hyperparameters") | |
| C = st.sidebar.slider("Regularization (C)", 0.01, 10.0, 1.0) | |
| epsilon = st.sidebar.slider("Epsilon", 0.01, 1.0, 0.1) | |
| # Display Scatter Plot of Data | |
| st.subheader("Dataset Scatter Plot") | |
| fig, ax = plt.subplots() | |
| scatter = ax.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis', edgecolors='k') | |
| ax.set_xlabel("Feature 1") | |
| ax.set_ylabel("Feature 2") | |
| ax.set_title("Business Data Classification") | |
| st.pyplot(fig) | |
| # Tabs for different kernel types | |
| tab1, tab2, tab3 = st.tabs(["Linear Kernel", "Polynomial Kernel", "RBF Kernel"]) | |
| def train_and_evaluate(kernel, degree=3, gamma='scale'): | |
| model = SVC(C=C, kernel=kernel, degree=degree, gamma=gamma) | |
| model.fit(X_train, y_train) | |
| y_pred = model.predict(X_test) | |
| accuracy = accuracy_score(y_test, y_pred) | |
| report = classification_report(y_test, y_pred, output_dict=True) | |
| return model, accuracy, report, y_pred | |
| # Linear Kernel | |
| with tab1: | |
| st.subheader("Linear Kernel") | |
| model, acc, report, y_pred = train_and_evaluate("linear") | |
| st.write(f"**Accuracy:** {acc:.2f}") | |
| st.write("**Classification Report:**", pd.DataFrame(report).transpose()) | |
| fig, ax = plt.subplots() | |
| ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, ax=ax) | |
| st.pyplot(fig) | |
| # Polynomial Kernel | |
| with tab2: | |
| st.subheader("Polynomial Kernel") | |
| degree = st.slider("Polynomial Degree", 2, 5, 3) | |
| model, acc, report, y_pred = train_and_evaluate("poly", degree) | |
| st.write(f"**Accuracy:** {acc:.2f}") | |
| st.write("**Classification Report:**", pd.DataFrame(report).transpose()) | |
| fig, ax = plt.subplots() | |
| ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, ax=ax) | |
| st.pyplot(fig) | |
| # RBF Kernel | |
| with tab3: | |
| st.subheader("RBF Kernel") | |
| gamma = st.slider("Gamma", 0.01, 1.0, 0.1) | |
| model, acc, report, y_pred = train_and_evaluate("rbf", gamma=gamma) | |
| st.write(f"**Accuracy:** {acc:.2f}") | |
| st.write("**Classification Report:**", pd.DataFrame(report).transpose()) | |
| fig, ax = plt.subplots() | |
| ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, ax=ax) | |
| st.pyplot(fig) | |
| st.write("This app demonstrates how different SVM kernels impact classification performance in a non-linear business problem.") | |