varshitha22 commited on
Commit
db78d55
·
verified ·
1 Parent(s): 7ce5655

Update pages/EDA.py

Browse files
Files changed (1) hide show
  1. pages/EDA.py +53 -1
pages/EDA.py CHANGED
@@ -1,5 +1,8 @@
1
  import streamlit as st
2
  import pandas as pd
 
 
 
3
 
4
  st.markdown(
5
  "<h3 style='text-align: left; color: #555;'>Data Frame</h3>",
@@ -8,4 +11,53 @@ st.markdown(
8
 
9
  df = pd.read_csv("https://huggingface.co/spaces/varshitha22/Crop_Recommendation/resolve/main/Crop_Recommendation.csv")
10
 
11
- st.dataframe(df) # Display the dataframe in Streamlit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import numpy as np
4
+ import seaborn as sns
5
+ import matplotlib.pyplot as plt
6
 
7
  st.markdown(
8
  "<h3 style='text-align: left; color: #555;'>Data Frame</h3>",
 
11
 
12
  df = pd.read_csv("https://huggingface.co/spaces/varshitha22/Crop_Recommendation/resolve/main/Crop_Recommendation.csv")
13
 
14
+ st.dataframe(df) # Display the dataframe in Streamlit
15
+
16
+ # Load dataset
17
+ df = pd.read_csv("https://huggingface.co/spaces/varshitha22/Crop_Recommendation/resolve/main/Crop_Recommendation.csv")
18
+
19
+ # Define numerical columns
20
+ num_cols = ['Nitrogen', 'Phosphorus', 'Potassium', 'Temperature', 'Humidity', 'pH_Value', 'Rainfall']
21
+
22
+ # Title
23
+ st.markdown("<h2 style='text-align: center; color: #2E86C1;'>Outlier Detection and Handling</h2>", unsafe_allow_html=True)
24
+
25
+ # --- Boxplot Visualization ---
26
+ st.markdown("<h3 style='text-align: left; color: #D35400;'>Outlier Detection (Boxplots)</h3>", unsafe_allow_html=True)
27
+
28
+ fig, ax = plt.subplots(2, 4, figsize=(12, 6))
29
+ ax = ax.flatten()
30
+
31
+ for i, col in enumerate(num_cols):
32
+ sns.boxplot(x=df[col], ax=ax[i], color="skyblue")
33
+ ax[i].set_title(col)
34
+
35
+ plt.tight_layout()
36
+ st.pyplot(fig)
37
+
38
+ # --- Outlier Handling ---
39
+ st.markdown("<h3 style='text-align: left; color: #28B463;'>Outlier Handling using IQR</h3>", unsafe_allow_html=True)
40
+
41
+ outlier_counts = {}
42
+
43
+ for col in num_cols:
44
+ Q1 = df[col].quantile(0.25)
45
+ Q3 = df[col].quantile(0.75)
46
+ IQR = Q3 - Q1
47
+
48
+ lower_bound = Q1 - 1.5 * IQR
49
+ upper_bound = Q3 + 1.5 * IQR
50
+
51
+ outliers = df[(df[col] < lower_bound) | (df[col] > upper_bound)]
52
+ outlier_counts[col] = len(outliers)
53
+
54
+ # Replace outliers with mean
55
+ df[col] = np.where((df[col] < lower_bound) | (df[col] > upper_bound), df[col].mean(), df[col])
56
+
57
+ # Display Outlier Counts
58
+ st.write("Number of Outliers Detected:")
59
+ st.write(outlier_counts)
60
+
61
+ # Display Cleaned Data
62
+ st.markdown("<h3 style='text-align: left; color: #AF7AC5;'>Dataset After Handling Outliers</h3>", unsafe_allow_html=True)
63
+ st.dataframe(df)