import streamlit as st
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns


# Page config
st.set_page_config(page_title="Explore SVM Algorithm", layout="wide")
st.title("🔍 Support Vector Machine (SVM)")

# -----------------------------------
# Theory Section
# -----------------------------------
st.markdown("""
## 🤖 What is a Support Vector Machine (SVM)?
SVM is a powerful supervised learning algorithm used for classification and regression.
It works by finding a hyperplane that best separates the classes in the feature space.

**Key Ideas:**
- Maximizes the margin between different classes
- Effective in high-dimensional spaces
- Can use **kernel tricks** to handle non-linear classification

---

## ⚙️ How SVM Works
1. Find the optimal hyperplane that separates classes.
2. Use **support vectors** — data points closest to the hyperplane.
3. Maximize the margin between these support vectors.
4. Use **kernel functions** to map inputs to higher dimensions if data isn't linearly separable.

**Kernel Types:**
- *Linear*: Straight line separation  
- *RBF (Gaussian)*: Circular, good for complex boundaries  
- *Polynomial*: Curved boundaries  

---
""")

# -----------------------------------
# Load Dataset
# -----------------------------------
st.subheader("🌼 Try SVM on the Iris Dataset")
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['target'] = iris.target
df['species'] = df['target'].apply(lambda x: iris.target_names[x])
st.dataframe(df.head(), use_container_width=True)

# -----------------------------------
# Model Controls
# -----------------------------------
kernel = st.radio("Select SVM Kernel", ["linear", "rbf", "poly"])
C = st.slider("Select Regularization Parameter (C)", 0.01, 10.0, value=1.0)

# Prepare Data
X = df.drop(columns=["target", "species"])
y = df['target']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train SVM
svm_model = SVC(kernel=kernel, C=C, probability=True, random_state=42)
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)

svm_acc = accuracy_score(y_test, svm_pred)
st.success(f"✅ SVM Accuracy: {svm_acc*100:.2f}%")

# -----------------------------------
# Classification Report
# -----------------------------------
svm_report = classification_report(y_test, svm_pred, target_names=iris.target_names)
st.markdown("### 📊 SVM Classification Report")
st.text(svm_report)

# -----------------------------------
# Visualize Decision Boundaries
# -----------------------------------
st.markdown("### 🌌 Visualizing Decision Boundaries (2 Features)")

feature_x = st.selectbox("Feature for X-axis", df.columns[:-2], index=0)
feature_y = st.selectbox("Feature for Y-axis", df.columns[:-2], index=1)

X_vis = df[[feature_x, feature_y]]
X_vis_scaled = scaler.fit_transform(X_vis)
X_train_v, X_test_v, y_train_v, y_test_v = train_test_split(X_vis_scaled, y, test_size=0.2, random_state=42)

model_vis = SVC(kernel=kernel, C=C)
model_vis.fit(X_train_v, y_train_v)

h = .02
x_min, x_max = X_vis_scaled[:, 0].min() - 1, X_vis_scaled[:, 0].max() + 1
y_min, y_max = X_vis_scaled[:, 1].min() - 1, X_vis_scaled[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = model_vis.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

fig, ax = plt.subplots(figsize=(8, 6))
plt.contourf(xx, yy, Z, alpha=0.3)
sns.scatterplot(x=X_vis_scaled[:, 0], y=X_vis_scaled[:, 1], hue=df['species'], palette='deep', ax=ax)
plt.xlabel(feature_x)
plt.ylabel(feature_y)
plt.title("SVM Decision Boundaries")
st.pyplot(fig)

# -----------------------------------
# Downloadable Report
# -----------------------------------
st.markdown("### 📥 Download SVM Report")
st.download_button("📄 Download Classification Report", data=svm_report, file_name="svm_report.txt")

# -----------------------------------
# Summary
# -----------------------------------
st.markdown("""
---
## 💡 Highlights of SVM:
- Works well for both linear and non-linear problems.
- Excellent performance on small to medium-sized datasets.
- Sensitive to outliers but tunable via regularization.

## 🔧 When to Use SVM?
Use them when:
- You have a clear margin of separation between classes.
- You're dealing with high-dimensional data.
- You want flexibility via kernels.

---

### 🧠 Did You Know?
- SVMs are **robust to overfitting**, especially in high-dimensional space.
- The **'C' parameter** controls the trade-off between training error and margin size.
- The **kernel trick** allows SVMs to operate in infinite-dimensional space.

### 📌 Pros & Cons
| Pros                            | Cons                                |
|---------------------------------|-------------------------------------|
| Works well on complex boundaries| Slower on large datasets            |
| Effective in high-dimensional space | Needs careful parameter tuning |
| Can handle non-linear data      | Less interpretable than simpler models |

---

### 🌀 Kernel Choice Summary
| Kernel      | Use Case                        |
|-------------|---------------------------------|
| Linear      | Simple, linearly separable data |
| RBF         | Most common, good for most cases|
| Polynomial  | Use if you suspect curved boundaries|

> 🎯 *Tip:* Start with linear, then try RBF if the data isn't linearly separable.
""")