Ramyamaheswari commited on
Commit
76c4437
Β·
verified Β·
1 Parent(s): 817d5ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -82
app.py CHANGED
@@ -4,95 +4,107 @@ import matplotlib.pyplot as plt
4
  import seaborn as sns
5
  from sklearn.datasets import load_iris
6
  from sklearn.model_selection import train_test_split
 
7
  from sklearn.preprocessing import StandardScaler
8
  from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
9
- from sklearn.tree import DecisionTreeClassifier, plot_tree
10
- from sklearn.neighbors import KNeighborsClassifier
11
 
12
- # Set up page
13
- st.set_page_config(page_title="ML Algorithms Explorer", layout="wide")
14
- st.title("🌸 ML Classifiers with Iris Dataset: KNN & Decision Tree")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- # Sidebar: Choose algorithm
17
- st.sidebar.header("βš™οΈ Algorithm Selection")
18
- model_choice = st.sidebar.selectbox("Choose a classifier", ["K-Nearest Neighbors (KNN)", "Decision Tree"])
 
 
 
 
 
 
 
 
 
 
19
 
20
- # Dataset
21
  iris = load_iris()
22
  df = pd.DataFrame(iris.data, columns=iris.feature_names)
23
  df["target"] = iris.target
24
  df["species"] = df["target"].apply(lambda x: iris.target_names[x])
25
 
26
- # Intro Section
27
- if model_choice == "K-Nearest Neighbors (KNN)":
28
- st.markdown("""
29
- ## πŸ“ K-Nearest Neighbors (KNN)
30
- **KNN** is a simple and intuitive algorithm that predicts based on the majority class of the K nearest data points.
31
- > 🧭 It's like asking your closest neighbors for advice!
32
-
33
- ---
34
- ### βš™οΈ How It Works:
35
- 1. Choose **K**.
36
- 2. Calculate distances.
37
- 3. Pick the **K closest**.
38
- 4. Predict the most frequent class.
39
- """)
40
- st.sidebar.subheader("KNN Settings")
41
- n_neighbors = st.sidebar.slider("Number of Neighbors (K)", 1, 15, 5)
42
- metric = st.sidebar.selectbox("Distance Metric", ["euclidean", "manhattan", "minkowski"])
43
- model = KNeighborsClassifier(n_neighbors=n_neighbors, metric=metric)
44
-
45
- else:
46
- st.markdown("""
47
- ## 🌳 Decision Tree Classifier
48
- A **Decision Tree** splits data based on feature values to build a tree-like model for decision-making.
49
- > 🧩 Think of it like playing "20 Questions" β€” each answer narrows things down!
50
-
51
- ---
52
- ### βš™οΈ How It Works:
53
- 1. Pick the best feature to split.
54
- 2. Repeat until data is separated.
55
- 3. Result: A tree structure for classification.
56
- """)
57
- st.sidebar.subheader("Decision Tree Settings")
58
- criterion = st.sidebar.radio("Splitting Criterion", ["gini", "entropy"])
59
- max_depth = st.sidebar.slider("Max Depth", 1, 10, value=3)
60
- model = DecisionTreeClassifier(criterion=criterion, max_depth=max_depth, random_state=42)
61
-
62
- # Show dataset
63
- st.subheader("🌼 Iris Dataset Preview")
64
  st.dataframe(df.head(), use_container_width=True)
65
 
66
- # Feature visualization
67
- st.markdown("### πŸ“Š Feature Visualization")
68
- selected_features = st.multiselect("Pick features to visualize", iris.feature_names, default=iris.feature_names[:2])
 
 
 
69
  if len(selected_features) == 2:
70
  plt.figure(figsize=(8, 5))
71
  sns.scatterplot(data=df, x=selected_features[0], y=selected_features[1], hue="species", palette="Set2", s=80)
72
  st.pyplot(plt.gcf())
73
  plt.clf()
74
 
75
- # Prepare and scale data
 
 
 
 
 
 
 
 
 
76
  X = df[iris.feature_names]
77
  y = df["target"]
78
 
79
  scaler = StandardScaler()
80
  X_scaled = scaler.fit_transform(X)
 
81
  X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
82
 
83
- # Train model
 
 
 
84
  model.fit(X_train, y_train)
85
  y_pred = model.predict(X_test)
86
 
87
- # Accuracy
 
 
88
  acc = accuracy_score(y_test, y_pred)
89
  st.success(f"βœ… Model Accuracy: {acc*100:.2f}%")
90
 
91
- # Classification report
92
  st.markdown("### 🧾 Classification Report")
93
  st.text(classification_report(y_test, y_pred, target_names=iris.target_names))
94
 
95
- # Confusion matrix
96
  st.markdown("### πŸ” Confusion Matrix")
97
  cm = confusion_matrix(y_test, y_pred)
98
  fig, ax = plt.subplots()
@@ -101,31 +113,30 @@ plt.xlabel("Predicted")
101
  plt.ylabel("Actual")
102
  st.pyplot(fig)
103
 
104
- # Tree plot if applicable
105
- if model_choice == "Decision Tree":
106
- st.markdown("### 🌳 Tree Visualization")
107
- fig, ax = plt.subplots(figsize=(12, 6))
108
- plot_tree(model, filled=True, feature_names=iris.feature_names, class_names=iris.target_names, fontsize=10)
109
- st.pyplot(fig)
110
-
111
- # Key takeaways
112
- st.markdown("---")
113
- if model_choice == "K-Nearest Neighbors (KNN)":
114
- st.markdown("""
115
- ## πŸ’‘ KNN Takeaways
116
- - No training phase β€” just prediction.
117
- - Simple and powerful for small datasets.
118
- - Needs **scaling** and is sensitive to **irrelevant features**.
119
- > 🎯 Use GridSearchCV to find the best **K**!
120
- """)
121
- else:
122
- st.markdown("""
123
- ## πŸ’‘ Decision Tree Takeaways
124
- - Easy to interpret and visualize.
125
- - Can overfit without depth control.
126
- - Works on both numeric and categorical features.
127
- > 🎯 Combine trees using **Random Forest** or **Boosting** for better performance!
128
- """)
129
-
130
-
131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import seaborn as sns
5
  from sklearn.datasets import load_iris
6
  from sklearn.model_selection import train_test_split
7
+ from sklearn.tree import DecisionTreeClassifier, plot_tree
8
  from sklearn.preprocessing import StandardScaler
9
  from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
 
 
10
 
11
+ # Set up the Streamlit page
12
+ st.set_page_config(page_title="Explore Decision Tree Algorithm", layout="wide")
13
+ st.title("🌳 Decision Tree Classifier: Explained with the Iris Dataset")
14
+
15
+ # ------------------------------------
16
+ # Introduction
17
+ # ------------------------------------
18
+ st.markdown("""
19
+ ## 🧠 What is a Decision Tree?
20
+ A **Decision Tree** is a popular machine learning algorithm that uses a tree-like structure to make decisions.
21
+ Each **internal node** asks a question about a feature, each **branch** represents the outcome of that question, and each **leaf node** gives the final prediction.
22
+
23
+ > 🧩 Think of it like playing "20 Questions" β€” each question helps narrow down the possibilities.
24
+
25
+ ---
26
+
27
+ ## βš™οΈ How Decision Trees Work
28
+ 1. Start with all the data at the root.
29
+ 2. Select the **best feature** to split the data (based on Gini or Entropy).
30
+ 3. Repeat the splitting process on each subset until:
31
+ - All points are classified
32
+ - Or a **stopping condition** (like max depth) is met
33
+
34
+ πŸ” Criteria used to choose the best feature:
35
+ - **Gini Index** (default)
36
+ - **Entropy** (Information Gain)
37
+
38
+ ---
39
 
40
+ ### πŸ“ˆ Pros and Cons
41
+ βœ… Easy to understand and visualize
42
+ βœ… Handles both numerical and categorical features
43
+ βœ… No need for feature scaling
44
+ ⚠️ Prone to overfitting β€” use `max_depth`, `min_samples_leaf`, or pruning
45
+
46
+ ---
47
+ """)
48
+
49
+ # ------------------------------------
50
+ # Load and Explore the Dataset
51
+ # ------------------------------------
52
+ st.subheader("🌼 Let's Explore the Iris Dataset")
53
 
 
54
  iris = load_iris()
55
  df = pd.DataFrame(iris.data, columns=iris.feature_names)
56
  df["target"] = iris.target
57
  df["species"] = df["target"].apply(lambda x: iris.target_names[x])
58
 
59
+ st.markdown("Here's a quick look at the dataset πŸ‘‡")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  st.dataframe(df.head(), use_container_width=True)
61
 
62
+ # ------------------------------------
63
+ # Feature Visualization
64
+ # ------------------------------------
65
+ st.markdown("### πŸ“Š Visualize Feature Relationships")
66
+ selected_features = st.multiselect("Pick two features to visualize", iris.feature_names, default=iris.feature_names[:2])
67
+
68
  if len(selected_features) == 2:
69
  plt.figure(figsize=(8, 5))
70
  sns.scatterplot(data=df, x=selected_features[0], y=selected_features[1], hue="species", palette="Set2", s=80)
71
  st.pyplot(plt.gcf())
72
  plt.clf()
73
 
74
+ # ------------------------------------
75
+ # Sidebar: Model Settings
76
+ # ------------------------------------
77
+ st.sidebar.header("🌲 Model Settings")
78
+ criterion = st.sidebar.radio("Splitting Criterion", ["gini", "entropy"])
79
+ max_depth = st.sidebar.slider("Max Depth", min_value=1, max_value=10, value=3)
80
+
81
+ # ------------------------------------
82
+ # Preprocessing and Train/Test Split
83
+ # ------------------------------------
84
  X = df[iris.feature_names]
85
  y = df["target"]
86
 
87
  scaler = StandardScaler()
88
  X_scaled = scaler.fit_transform(X)
89
+
90
  X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
91
 
92
+ # ------------------------------------
93
+ # Train Model
94
+ # ------------------------------------
95
+ model = DecisionTreeClassifier(criterion=criterion, max_depth=max_depth, random_state=42)
96
  model.fit(X_train, y_train)
97
  y_pred = model.predict(X_test)
98
 
99
+ # ------------------------------------
100
+ # Performance Metrics
101
+ # ------------------------------------
102
  acc = accuracy_score(y_test, y_pred)
103
  st.success(f"βœ… Model Accuracy: {acc*100:.2f}%")
104
 
 
105
  st.markdown("### 🧾 Classification Report")
106
  st.text(classification_report(y_test, y_pred, target_names=iris.target_names))
107
 
 
108
  st.markdown("### πŸ” Confusion Matrix")
109
  cm = confusion_matrix(y_test, y_pred)
110
  fig, ax = plt.subplots()
 
113
  plt.ylabel("Actual")
114
  st.pyplot(fig)
115
 
116
+ # ------------------------------------
117
+ # Visualize Decision Tree
118
+ # ------------------------------------
119
+ st.markdown("### 🌳 Visualizing the Tree Structure")
120
+ fig, ax = plt.subplots(figsize=(12, 6))
121
+ plot_tree(model, filled=True, feature_names=iris.feature_names, class_names=iris.target_names, fontsize=10)
122
+ st.pyplot(fig)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
+ # ------------------------------------
125
+ # Final Thoughts
126
+ # ------------------------------------
127
+ st.markdown("""
128
+ ---
129
+ ## πŸ’‘ Key Takeaways
130
+ - Decision Trees offer **clear visual explanations** of how decisions are made.
131
+ - They need **very little preprocessing** (like normalization or encoding).
132
+ - They’re easy to overfit on small datasets β€” control complexity with `max_depth`, `min_samples_leaf`, or **pruning**.
133
+
134
+ ## πŸ“Œ When Should You Use a Decision Tree?
135
+ - When model **interpretability** is important
136
+ - When your data contains both **numerical and categorical** features
137
+ - When you need a **fast prototype**
138
+
139
+ > 🎯 *Pro Tip:* Use ensembles like **Random Forest** or **Gradient Boosting** for better performance in real-world scenarios.
140
+
141
+ ---
142
+ """)