Ramyamaheswari commited on
Commit
817d5ef
Β·
verified Β·
1 Parent(s): 630ea40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -60
app.py CHANGED
@@ -4,55 +4,67 @@ import matplotlib.pyplot as plt
4
  import seaborn as sns
5
  from sklearn.datasets import load_iris
6
  from sklearn.model_selection import train_test_split
7
- from sklearn.neighbors import KNeighborsClassifier
8
  from sklearn.preprocessing import StandardScaler
9
  from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
 
 
10
 
11
  # Set up page
12
- st.set_page_config(page_title="Explore KNN Algorithm", layout="wide")
13
- st.title("πŸ“ K-Nearest Neighbors (KNN): Explained with Iris Dataset")
14
 
15
- # Intro Section
16
- st.markdown("""
17
- ## 🧠 What is K-Nearest Neighbors?
18
- **KNN** is a simple and intuitive machine learning algorithm that makes predictions based on the **majority class of the K closest data points** in the feature space.
19
- > 🧭 Think of it like asking your neighbors what they think β€” you take the majority opinion.
20
-
21
- ---
22
- ## βš™οΈ How KNN Works
23
- 1. Choose the number of neighbors **K**.
24
- 2. Calculate distance (usually **Euclidean**) between points.
25
- 3. Pick the **K closest data points**.
26
- 4. Predict the class that occurs most frequently among them.
27
-
28
- πŸ” Distance Metrics:
29
- - Euclidean (default)
30
- - Manhattan
31
- - Minkowski
32
-
33
- ---
34
- ### πŸ“ˆ Pros and Cons
35
- βœ… Simple to understand
36
- βœ… No training time (lazy learner)
37
- βœ… Works well with small datasets
38
- ⚠️ Slow on large datasets
39
- ⚠️ Needs feature scaling
40
- ⚠️ Sensitive to outliers and irrelevant features
41
- ---
42
- """)
43
-
44
- # Dataset and DataFrame
45
- st.subheader("🌼 Let's Explore the Iris Dataset")
46
  iris = load_iris()
47
  df = pd.DataFrame(iris.data, columns=iris.feature_names)
48
  df["target"] = iris.target
49
  df["species"] = df["target"].apply(lambda x: iris.target_names[x])
50
 
51
- st.markdown("Here's a peek at the dataset πŸ‘‡")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  st.dataframe(df.head(), use_container_width=True)
53
 
54
- # Feature distribution visualization
55
- st.markdown("### πŸ“Š Visualize Features")
56
  selected_features = st.multiselect("Pick features to visualize", iris.feature_names, default=iris.feature_names[:2])
57
  if len(selected_features) == 2:
58
  plt.figure(figsize=(8, 5))
@@ -60,26 +72,19 @@ if len(selected_features) == 2:
60
  st.pyplot(plt.gcf())
61
  plt.clf()
62
 
63
- # Sidebar controls
64
- st.sidebar.header("πŸ“ KNN Model Settings")
65
- n_neighbors = st.sidebar.slider("Number of Neighbors (K)", 1, 15, value=5)
66
- metric = st.sidebar.selectbox("Distance Metric", ["euclidean", "manhattan", "minkowski"])
67
-
68
- # Prepare data
69
  X = df[iris.feature_names]
70
  y = df["target"]
71
 
72
  scaler = StandardScaler()
73
  X_scaled = scaler.fit_transform(X)
74
-
75
  X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
76
 
77
  # Train model
78
- model = KNeighborsClassifier(n_neighbors=n_neighbors, metric=metric)
79
  model.fit(X_train, y_train)
80
  y_pred = model.predict(X_test)
81
 
82
- # Model performance
83
  acc = accuracy_score(y_test, y_pred)
84
  st.success(f"βœ… Model Accuracy: {acc*100:.2f}%")
85
 
@@ -96,19 +101,31 @@ plt.xlabel("Predicted")
96
  plt.ylabel("Actual")
97
  st.pyplot(fig)
98
 
99
- # Final tips
100
- st.markdown("""
101
- ---
102
- ## πŸ’‘ Key Takeaways
103
- - KNN is **non-parametric** and easy to implement.
104
- - It’s a **lazy learner** β€” no training, just prediction.
105
- - Sensitive to **scaling**, **K value**, and **irrelevant features**.
106
- ## πŸ“Œ When to Use KNN?
107
- - When you want a **simple baseline model**.
108
- - For **small- to medium-sized** datasets.
109
- - When your features are properly scaled and meaningful.
110
- > 🎯 *Tip:* Use cross-validation to choose the optimal value of **K** and avoid overfitting!
111
- ---
112
- """)
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
 
 
4
  import seaborn as sns
5
  from sklearn.datasets import load_iris
6
  from sklearn.model_selection import train_test_split
 
7
  from sklearn.preprocessing import StandardScaler
8
  from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
9
+ from sklearn.tree import DecisionTreeClassifier, plot_tree
10
+ from sklearn.neighbors import KNeighborsClassifier
11
 
12
  # Set up page
13
+ st.set_page_config(page_title="ML Algorithms Explorer", layout="wide")
14
+ st.title("🌸 ML Classifiers with Iris Dataset: KNN & Decision Tree")
15
 
16
+ # Sidebar: Choose algorithm
17
+ st.sidebar.header("βš™οΈ Algorithm Selection")
18
+ model_choice = st.sidebar.selectbox("Choose a classifier", ["K-Nearest Neighbors (KNN)", "Decision Tree"])
19
+
20
+ # Dataset
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  iris = load_iris()
22
  df = pd.DataFrame(iris.data, columns=iris.feature_names)
23
  df["target"] = iris.target
24
  df["species"] = df["target"].apply(lambda x: iris.target_names[x])
25
 
26
+ # Intro Section
27
+ if model_choice == "K-Nearest Neighbors (KNN)":
28
+ st.markdown("""
29
+ ## πŸ“ K-Nearest Neighbors (KNN)
30
+ **KNN** is a simple and intuitive algorithm that predicts based on the majority class of the K nearest data points.
31
+ > 🧭 It's like asking your closest neighbors for advice!
32
+
33
+ ---
34
+ ### βš™οΈ How It Works:
35
+ 1. Choose **K**.
36
+ 2. Calculate distances.
37
+ 3. Pick the **K closest**.
38
+ 4. Predict the most frequent class.
39
+ """)
40
+ st.sidebar.subheader("KNN Settings")
41
+ n_neighbors = st.sidebar.slider("Number of Neighbors (K)", 1, 15, 5)
42
+ metric = st.sidebar.selectbox("Distance Metric", ["euclidean", "manhattan", "minkowski"])
43
+ model = KNeighborsClassifier(n_neighbors=n_neighbors, metric=metric)
44
+
45
+ else:
46
+ st.markdown("""
47
+ ## 🌳 Decision Tree Classifier
48
+ A **Decision Tree** splits data based on feature values to build a tree-like model for decision-making.
49
+ > 🧩 Think of it like playing "20 Questions" β€” each answer narrows things down!
50
+
51
+ ---
52
+ ### βš™οΈ How It Works:
53
+ 1. Pick the best feature to split.
54
+ 2. Repeat until data is separated.
55
+ 3. Result: A tree structure for classification.
56
+ """)
57
+ st.sidebar.subheader("Decision Tree Settings")
58
+ criterion = st.sidebar.radio("Splitting Criterion", ["gini", "entropy"])
59
+ max_depth = st.sidebar.slider("Max Depth", 1, 10, value=3)
60
+ model = DecisionTreeClassifier(criterion=criterion, max_depth=max_depth, random_state=42)
61
+
62
+ # Show dataset
63
+ st.subheader("🌼 Iris Dataset Preview")
64
  st.dataframe(df.head(), use_container_width=True)
65
 
66
+ # Feature visualization
67
+ st.markdown("### πŸ“Š Feature Visualization")
68
  selected_features = st.multiselect("Pick features to visualize", iris.feature_names, default=iris.feature_names[:2])
69
  if len(selected_features) == 2:
70
  plt.figure(figsize=(8, 5))
 
72
  st.pyplot(plt.gcf())
73
  plt.clf()
74
 
75
+ # Prepare and scale data
 
 
 
 
 
76
  X = df[iris.feature_names]
77
  y = df["target"]
78
 
79
  scaler = StandardScaler()
80
  X_scaled = scaler.fit_transform(X)
 
81
  X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
82
 
83
  # Train model
 
84
  model.fit(X_train, y_train)
85
  y_pred = model.predict(X_test)
86
 
87
+ # Accuracy
88
  acc = accuracy_score(y_test, y_pred)
89
  st.success(f"βœ… Model Accuracy: {acc*100:.2f}%")
90
 
 
101
  plt.ylabel("Actual")
102
  st.pyplot(fig)
103
 
104
+ # Tree plot if applicable
105
+ if model_choice == "Decision Tree":
106
+ st.markdown("### 🌳 Tree Visualization")
107
+ fig, ax = plt.subplots(figsize=(12, 6))
108
+ plot_tree(model, filled=True, feature_names=iris.feature_names, class_names=iris.target_names, fontsize=10)
109
+ st.pyplot(fig)
110
+
111
+ # Key takeaways
112
+ st.markdown("---")
113
+ if model_choice == "K-Nearest Neighbors (KNN)":
114
+ st.markdown("""
115
+ ## πŸ’‘ KNN Takeaways
116
+ - No training phase β€” just prediction.
117
+ - Simple and powerful for small datasets.
118
+ - Needs **scaling** and is sensitive to **irrelevant features**.
119
+ > 🎯 Use GridSearchCV to find the best **K**!
120
+ """)
121
+ else:
122
+ st.markdown("""
123
+ ## πŸ’‘ Decision Tree Takeaways
124
+ - Easy to interpret and visualize.
125
+ - Can overfit without depth control.
126
+ - Works on both numeric and categorical features.
127
+ > 🎯 Combine trees using **Random Forest** or **Boosting** for better performance!
128
+ """)
129
+
130
 
131