Spaces:

louiecerv
/

svm_classifier

Sleeping

App Files Files Community

louiecerv commited on Jan 29, 2025

Commit

62004ae

1 Parent(s): f023c06

update remote

Browse files

Files changed (3) hide show

app.py +78 -0
data_generator.py +19 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import streamlit as st
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+from sklearn.svm import SVC
+from sklearn.metrics import classification_report, accuracy_score, ConfusionMatrixDisplay
+import data_generator
+# Load dataset from CSV
+business_data = pd.read_csv("business_data.csv")
+X = business_data.iloc[:, :-1].values
+y = business_data.iloc[:, -1].values
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
+scaler = StandardScaler()
+X_train = scaler.fit_transform(X_train)
+X_test = scaler.transform(X_test)
+# Business Problem Description
+"""
+This simulated dataset represents a business classification problem where a company is trying to categorize customer behaviors
+into two distinct segments. The classification is based on factors such as purchase history, engagement levels, and
+customer loyalty indicators. The data is structured in a way that requires a non-linear classification approach, making it
+an ideal case for Support Vector Machines with polynomial or RBF kernels.
+"""
+# Streamlit App
+st.title("SVM Business Classification App")
+st.sidebar.header("Model Hyperparameters")
+C = st.sidebar.slider("Regularization (C)", 0.01, 10.0, 1.0)
+epsilon = st.sidebar.slider("Epsilon", 0.01, 1.0, 0.1)
+# Tabs for different kernel types
+tab1, tab2, tab3 = st.tabs(["Linear Kernel", "Polynomial Kernel", "RBF Kernel"])
+def train_and_evaluate(kernel, degree=3, gamma='scale'):
+    model = SVC(C=C, kernel=kernel, degree=degree, gamma=gamma)
+    model.fit(X_train, y_train)
+    y_pred = model.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+    report = classification_report(y_test, y_pred, output_dict=True)
+    return model, accuracy, report, y_pred
+# Linear Kernel
+with tab1:
+    st.subheader("Linear Kernel")
+    model, acc, report, y_pred = train_and_evaluate("linear")
+    st.write(f"**Accuracy:** {acc:.2f}")
+    st.write("**Classification Report:**", pd.DataFrame(report).transpose())
+    fig, ax = plt.subplots()
+    ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, ax=ax)
+    st.pyplot(fig)
+# Polynomial Kernel
+with tab2:
+    st.subheader("Polynomial Kernel")
+    degree = st.slider("Polynomial Degree", 2, 5, 3)
+    model, acc, report, y_pred = train_and_evaluate("poly", degree)
+    st.write(f"**Accuracy:** {acc:.2f}")
+    st.write("**Classification Report:**", pd.DataFrame(report).transpose())
+    fig, ax = plt.subplots()
+    ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, ax=ax)
+    st.pyplot(fig)
+# RBF Kernel
+with tab3:
+    st.subheader("RBF Kernel")
+    gamma = st.slider("Gamma", 0.01, 1.0, 0.1)
+    model, acc, report, y_pred = train_and_evaluate("rbf", gamma=gamma)
+    st.write(f"**Accuracy:** {acc:.2f}")
+    st.write("**Classification Report:**", pd.DataFrame(report).transpose())
+    fig, ax = plt.subplots()
+    ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, ax=ax)
+    st.pyplot(fig)
+st.write("This app demonstrates how different SVM kernels impact classification performance in a non-linear business problem.")

data_generator.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import numpy as np
+import pandas as pd
+from sklearn.datasets import make_moons
+def generate_simulated_data():
+    """Generates a simulated business classification dataset and saves it to a CSV file."""
+    np.random.seed(42)
+    X, y = make_moons(n_samples=300, noise=0.2, random_state=42)
+    # Convert to DataFrame
+    df = pd.DataFrame(X, columns=["Feature1", "Feature2"])
+    df["Target"] = y
+    # Save to CSV
+    df.to_csv("business_data.csv", index=False)
+if __name__ == "__main__":
+    generate_simulated_data()
+    print("Simulated business dataset saved as 'business_data.csv'.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+streamlit
+numpy
+pandas
+matplotlib
+scikit-learn