Crop_Recommendation / pages /Exploratory Data Analysis.py
varshitha22's picture
Update pages/Exploratory Data Analysis.py
312d909 verified
import streamlit as st
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
st.markdown(
"<h3 style='text-align: left; color: #555; font-size: 24px;'>Data Frame</h3>",
unsafe_allow_html=True
)
df = pd.read_csv("https://huggingface.co/spaces/varshitha22/Crop_Recommendation/resolve/main/Crop_Recommendation.csv")
df1 = df.head()
st.dataframe(df1) # Display the dataframe in Streamlit
# Load dataset
df = pd.read_csv("https://huggingface.co/spaces/varshitha22/Crop_Recommendation/resolve/main/Crop_Recommendation.csv")
# Define numerical columns
num_cols = ['Nitrogen', 'Phosphorus', 'Potassium', 'Temperature', 'Humidity', 'pH_Value', 'Rainfall']
# Title
st.markdown("<h2 style='text-align: left; color: #2E86C1;font-size: 24px;'>Outlier Detection and Handling</h2>", unsafe_allow_html=True)
# --- Boxplot Visualization ---
st.markdown("<h3 style='text-align: left; color: #D35400;font-size: 24px;'>Outlier Detection (Boxplots)</h3>", unsafe_allow_html=True)
fig, ax = plt.subplots(2, 4, figsize=(12, 6))
ax = ax.flatten()
for i, col in enumerate(num_cols):
sns.boxplot(x=df[col], ax=ax[i], color="skyblue")
ax[i].set_title(col)
plt.tight_layout()
st.pyplot(fig)
# --- Outlier Handling ---
st.markdown("<h3 style='text-align: left; color: #28B463;font-size: 24px;'>Outlier Handling using IQR</h3>", unsafe_allow_html=True)
outlier_counts = {}
for col in num_cols:
Q1 = df[col].quantile(0.25)
Q3 = df[col].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
outliers = df[(df[col] < lower_bound) | (df[col] > upper_bound)]
outlier_counts[col] = len(outliers)
# Replace outliers with mean
df[col] = np.where((df[col] < lower_bound) | (df[col] > upper_bound), df[col].mean(), df[col])
# Display Outlier Counts
st.write("Number of Outliers Detected:")
st.write(outlier_counts)
# Title with color
st.markdown("<h2 style='text-align: left; color: #2E86C1;font-size: 24px;'>Why Use the IQR Method?</h2>", unsafe_allow_html=True)
# Explanation with smaller font size
st.markdown("""
<style>
.why-text {
font-size: 18px; /* Decreased font size */
color: #333;
background-color: #f9f9f9;
padding: 10px;
border-radius: 10px;
}
</style>
<div class='why-text'>
1. Other methods like mean and standard deviation can be heavily influenced by extreme values.<br><br>
2. IQR focuses only on the middle 50% of data (between Q1 and Q3), making it less affected by extreme values.<br><br>
3. Other methods may remove outliers entirely, leading to data loss.<br><br>
4. Instead of dropping rows, the IQR method replaces outliers with the mean of the column, keeping the dataset size the same.<br><br>
5. This is useful when we don’t want to lose important information but still need to control extreme values.<br><br>
</div>
""", unsafe_allow_html=True)
st.markdown("<h2 style='text-align: left; color: #D35400;font-size: 24px;'>Crop Recommendation System - Feature Visualization</h2>", unsafe_allow_html=True)
# List of available visualizations
visualizations = [
"Nitrogen Requirement per Crop",
"Phosphorus Requirement per Crop",
"Potassium Requirement per Crop",
"Temperature Distribution per Crop",
"Humidity Distribution per Crop",
"pH Value Distribution per Crop",
"Rainfall Distribution per Crop"
]
# Slider for visualization selection
viz_option = st.selectbox(
"Select a visualization:",
options=visualizations
)
# Function to plot feature distributions per crop
def plot_feature(feature, title):
avg_feature_per_crop = df.groupby("Crop")[feature].mean().reset_index()
fig, axes = plt.subplots(1, 2, figsize=(10, 4)) # Decreased plot size
# Bar plot
sns.barplot(data=avg_feature_per_crop, x=feature, y="Crop", palette="rainbow", ax=axes[0])
axes[0].set_xlabel(f"Average {feature} per Crop")
axes[0].set_ylabel("Crop")
axes[0].set_title(f"Average {feature} per Crop")
# KDE plot
sns.kdeplot(data=avg_feature_per_crop, x=feature, fill=True, ax=axes[1])
axes[1].set_xlabel(feature)
axes[1].set_ylabel("Density")
axes[1].set_title(f"Distribution of {feature}")
st.pyplot(fig)
# Display selected plot
feature_mapping = {
"Nitrogen Requirement per Crop": "Nitrogen",
"Phosphorus Requirement per Crop": "Phosphorus",
"Potassium Requirement per Crop": "Potassium",
"Temperature Distribution per Crop": "Temperature",
"Humidity Distribution per Crop": "Humidity",
"pH Value Distribution per Crop": "pH_Value",
"Rainfall Distribution per Crop": "Rainfall",
}
plot_feature(feature_mapping[viz_option], viz_option)
st.subheader("Proportion of Crops")
crop_counts = df['Crop'].value_counts()
explode = [0.1 if i == 0 else 0 for i in range(len(crop_counts))]
fig, ax = plt.subplots(figsize=(4, 2.5)) # Reduced plot size
ax.pie(
crop_counts, labels=crop_counts.index, autopct='%1.1f%%',
startangle=100, explode=explode, colors=sns.color_palette("rainbow", len(crop_counts)),
textprops={'fontsize': 4} # Decreased label size
)
ax.set_title("Proportion of Crops", fontsize=3)
st.pyplot(fig)
# Above-Average Nutrient Requirement Crops
st.markdown("<h2 style='text-align: left; color: #D35400;font-size: 24px;'>Crops that require above-average soil nutrients</h2>", unsafe_allow_html=True)
# Calculate average nutrient levels
avg_nitrogen = df['Nitrogen'].mean()
avg_phosphorus = df['Phosphorus'].mean()
avg_potassium = df['Potassium'].mean()
# Function to plot bar charts
def plot_nutrient_crops(data, title):
crop_counts = data.value_counts()
fig, ax = plt.subplots(figsize=(4, 2)) # Adjusted plot size for better display
crop_counts.plot(kind='bar', color='skyblue', ax=ax)
ax.set_xlabel('Crops', fontsize=4)
ax.set_ylabel('Count', fontsize=4)
ax.set_title(title, fontsize=4)
ax.tick_params(axis='x', labelsize=4) # Adjust x-axis label size
ax.tick_params(axis='y', labelsize=4) # Adjust y-axis label size
st.pyplot(fig)
# Plot: Crops in Nutrient-Rich Soil
more_avg_of_soil_nutrients = df[
(df['Nitrogen'] >= avg_nitrogen) &
(df['Phosphorus'] >= avg_phosphorus) &
(df['Potassium'] >= avg_potassium)
]['Crop']
plot_nutrient_crops(more_avg_of_soil_nutrients, "Crops Growing in Nutrient-Rich Soil")
# Insights for Nutrient-Rich Crops
st.markdown("<h2 style='color: #2E86C1; font-size: 24px;'>Insights:</h2>", unsafe_allow_html=True)
st.markdown("""
<style>
.insights-box {
font-size: 19px;
color: #333;
background-color: #f9f9f9;
padding: 8px;
border-radius: 8px;
border-left: 4px solid #2E86C1;
}
</style>
<div class='insights-box'>
1. Banana is the most frequent crop, indicating it requires high soil nutrients.<br>
2. Rice, Papaya, and Jute also need good nutrients but less than Banana.<br>
3. Chickpea appears the least, suggesting it can grow in soils with relatively lower nutrients.<br>
</div>
""", unsafe_allow_html=True)
# Below-Average Nutrient Requirement Crops
st.markdown("<h2 style='text-align: left; color: #D35400;font-size: 24px;'>Crops that require below-average soil nutrients</h2>", unsafe_allow_html=True)
# Plot: Crops in Nutrient-Poor Soil
less_avg_of_soil_nutrients = df[
(df['Nitrogen'] < avg_nitrogen) &
(df['Phosphorus'] < avg_phosphorus) &
(df['Potassium'] < avg_potassium)
]['Crop']
plot_nutrient_crops(less_avg_of_soil_nutrients, "Crops Growing in Nutrient-Poor Soil")
# Insights for Nutrient-Poor Crops
st.markdown("<h2 style='color: #D35400; font-size: 24px;'>Insights:</h2>", unsafe_allow_html=True)
st.markdown("""
<style>
.insights-box {
font-size: 19px;
color: #333;
background-color: #f9f9f9;
padding: 8px;
border-radius: 8px;
border-left: 4px solid #2E86C1;
}
</style>
<div class='insights-box'>
1. Orange, Mango, and Coconut are the most frequent crops found in nutrient-poor soil.<br>
2. These crops are well-adapted to low-fertility conditions and can grow even when nutrient levels are below average.<br>
3. Unlike Banana and Rice, which need nutrient-rich soil to grow well, crops like Orange, Mango, and Coconut can survive with fewer nutrients. These fruit trees are naturally more resistant to dry conditions and can grow even in sandy or less fertile soils.<br>
</div>
""", unsafe_allow_html=True)
st.markdown("<h2 style='color: #2E86C1; font-size: 24px;'>How This Helps Farmers:</h2>", unsafe_allow_html=True)
st.markdown("<h2 style='color: #28B463; font-size: 22px;'>Choosing the Right Crops for Low-Fertility Soil:</h2>", unsafe_allow_html=True)
st.markdown("""
<style>
.insights-box {
font-size: 18px;
color: #333;
background-color: #f9f9f9;
padding: 8px;
border-radius: 8px;
border-left: 4px solid #2E86C1;
}
</style>
<div class='insights-box'>
1. If the soil is not very rich in nutrients, farmers should avoid growing crops like Banana or Rice, which need a lot of nutrients to grow well.<br>
2. Instead, they can plant fruit trees like Orange, Mango, and Coconut, which can survive and produce good yields even in poor soil.<br>
</div>
""", unsafe_allow_html=True)
st.markdown("<h2 style='color: #28B463; font-size: 24px;'>Improving Soil Naturally with Legumes:</h2>", unsafe_allow_html=True)
st.markdown("""
<style>
.insights-box {
font-size: 18px;
color: #333;
background-color: #f9f9f9;
padding: 8px;
border-radius: 8px;
border-left: 4px solid #2E86C1;
}
</style>
<div class='insights-box'>
1. Leguminous crops like MungBean and MothBeans help the soil by adding nitrogen naturally.<br>
2. If farmers grow these crops in rotation with other crops, the soil will become more fertile over time, reducing the need for chemical fertilizers.<br>
</div>
""", unsafe_allow_html=True)
st.markdown("<h2 style='text-align: left; color: #D35400;font-size: 24px;'>Crops that require above-average climate factors</h2>", unsafe_allow_html=True)
# Calculate average of climate factors
avg_Temperature = df['Temperature'].mean()
avg_Humidity = df['Humidity'].mean()
avg_pH_Value = df['pH_Value'].mean()
avg_Rainfall = df['Rainfall'].mean()
# Function to plot bar charts
def plot_climate_crops(data, title):
clim_crop_counts = data.value_counts()
fig, ax = plt.subplots(figsize=(4, 2)) # Adjusted plot size for better display
clim_crop_counts.plot(kind='bar', color='skyblue', ax=ax)
ax.set_xlabel('Crops', fontsize=4)
ax.set_ylabel('Count', fontsize=4)
ax.set_title(title, fontsize=4)
ax.tick_params(axis='x', labelsize=4) # Adjust x-axis label size
ax.tick_params(axis='y', labelsize=4) # Adjust y-axis label size
st.pyplot(fig)
# Plot: Crops in climate factors
more_avg_of_Climate_Factors = df[
(df['Temperature'] >= avg_Temperature) &
(df['Humidity'] >= avg_Humidity) &
(df['pH_Value'] >= avg_pH_Value)&
(df['Rainfall'] >= avg_Rainfall)
]['Crop']
plot_climate_crops(more_avg_of_Climate_Factors, "Crops Growing in climate factors")
# Insights
st.markdown("<h2 style='color: #2E86C1; font-size: 23px;'>Insights:</h2>", unsafe_allow_html=True)
st.markdown("""
<style>
.insights-box {
font-size: 19px;
color: #333;
background-color: #f9f9f9;
padding: 8px;
border-radius: 8px;
border-left: 4px solid #2E86C1;
}
</style>
<div class='insights-box'>
1. Papaya and orange grow best in warm, humid, and rainy climates.<br>
2. Jute and rice do well in high rainfall areas.<br>
3. Maize and cotton grow the least, meaning they might prefer drier climates.<br>
</div>
""", unsafe_allow_html=True)
# Below-Average climate factors Requirement Crops
st.markdown("<h2 style='text-align: left; color: #D35400;font-size: 23px;'>Crops that require below-average climate factors</h2>", unsafe_allow_html=True)
# Plot: Crops in Nutrient-Poor Soil
less_avg_of_Climate_Factors = df[
(df['Temperature'] < avg_Temperature) &
(df['Humidity'] < avg_Humidity) &
(df['pH_Value'] < avg_pH_Value)&
(df['Rainfall'] < avg_Rainfall)
]['Crop']
plot_climate_crops(less_avg_of_Climate_Factors, "Crops Growing in climate factors")
# Insights for Nutrient-Poor Crops
st.markdown("<h2 style='color: #D35400; font-size: 23px;'>Insights:</h2>", unsafe_allow_html=True)
st.markdown("""
<style>
.insights-box {
font-size: 19px;
color: #333;
background-color: #f9f9f9;
padding: 8px;
border-radius: 8px;
border-left: 4px solid #2E86C1;
}
</style>
<div class='insights-box'>
1. Maize and Kidney Beans Grow Best : These crops can grow well even in dry and less humid conditions with low rainfall. They are strong and can survive in tough climates.<br>
2. Chickpea and Lentil Grow Okay : They can manage in dry conditions but need some care, like proper soil and water.<br>
3. Moth Beans and Pigeon Peas Struggle : These crops don’t grow well in dry weather and need more rain and better soil.<br>
</div>
""", unsafe_allow_html=True)
st.markdown("<h2 style='color: #D35400; font-size: 23px;'>proportion of crops Growing in rich soil Nutrients and climate factors</h2>", unsafe_allow_html=True)
# Calculate average values for nutrients & climate factors
avg_nitrogen = df['Nitrogen'].mean()
avg_phosphorus = df['Phosphorus'].mean()
avg_potassium = df['Potassium'].mean()
avg_temperature = df['Temperature'].mean()
avg_humidity = df['Humidity'].mean()
avg_pH_value = df['pH_Value'].mean()
avg_rainfall = df['Rainfall'].mean()
# Filter Crops Growing in Rich Nutrient & Climate Conditions
Crop_recommendation = df[
(df['Nitrogen'] >= avg_nitrogen) & (df['Phosphorus'] >= avg_phosphorus) &
(df['Potassium'] >= avg_potassium) & (df['Temperature'] >= avg_temperature) &
(df['Humidity'] >= avg_humidity) & (df['pH_Value'] >= avg_pH_value) &
(df['Rainfall'] >= avg_rainfall)
]['Crop']
crop_counts = Crop_recommendation.value_counts()
explode = [0.1 if i == 0 else 0 for i in range(len(crop_counts))]
# Create Pie Chart for Crops Growing in Rich Nutrient & Climate Conditions
fig, ax = plt.subplots(figsize=(4, 2.5)) # Reduced plot size
ax.pie(
crop_counts, labels=crop_counts.index, autopct='%1.1f%%',
startangle=140, explode=explode, colors=sns.color_palette("rainbow", len(crop_counts)),
textprops={'fontsize': 4} # Smaller label size
)
st.pyplot(fig)
# Insights for Nutrient-Rich Crops
st.markdown("<h2 style='color: #D35400; font-size: 23px;'>Insights:</h2>", unsafe_allow_html=True)
st.markdown("""
<style>
.insights-box {
font-size: 19px;
color: #333;
background-color: #f9f9f9;
padding: 8px;
border-radius: 8px;
border-left: 4px solid #2E86C1;
}
</style>
<div class='insights-box'>
1. If your land has high soil nutrients (Nitrogen, Phosphorus, Potassium) and favorable climate conditions (good temperature, humidity, pH, and rainfall), these are the best crops to grow
</div>
""", unsafe_allow_html=True)
st.markdown("<h2 style='color: #D35400; font-size: 23px;'>proportion of crops Growing in less soil Nutrients and climate factors</h2>", unsafe_allow_html=True)
# Filter Crops Growing in Poor Nutrient & Climate Conditions
Crop_recommendation1 = df[
(df['Nitrogen'] < avg_nitrogen) & (df['Phosphorus'] < avg_phosphorus) &
(df['Potassium'] < avg_potassium) & (df['Temperature'] < avg_temperature) &
(df['Humidity'] < avg_humidity) & (df['pH_Value'] < avg_pH_value) &
(df['Rainfall'] < avg_rainfall)
]['Crop']
crop_counts1 = Crop_recommendation1.value_counts()
explode = [0.1 if i == 0 else 0 for i in range(len(crop_counts1))]
fig, ax = plt.subplots(figsize=(4, 2.5)) # **Balanced small size**
ax.pie(
crop_counts1, labels=crop_counts1.index, autopct='%1.1f%%',
startangle=140, explode=explode, colors=sns.color_palette("rainbow", len(crop_counts1)),
textprops={'fontsize': 4} # **Readable small font size**
)
ax.set_aspect("equal") # **Ensures circular shape**
st.pyplot(fig)
# Insights for Nutrient-Poor Crops
st.markdown("<h2 style='color: #D35400; font-size: 23px;'>Insights:</h2>", unsafe_allow_html=True)
st.markdown("""
<style>
.insights-box {
font-size: 19px;
color: #333;
background-color: #f9f9f9;
padding: 8px;
border-radius: 8px;
border-left: 4px solid #2E86C1;
}
</style>
<div class='insights-box'>
1. If your land has low soil nutrients (Nitrogen, Phosphorus, Potassium) and unfavorable climate conditions (low temperature, humidity, pH, and rainfall), Moth Beans is the best option.
</div>
""", unsafe_allow_html=True)
st.markdown("<h2 style='color: #D35400; font-size: 23px;'>Why MothBeans ?</h2>", unsafe_allow_html=True)
st.markdown("""
<style>
.insights-box {
font-size: 19px;
color: #333;
background-color: #f9f9f9;
padding: 8px;
border-radius: 8px;
border-left: 4px solid #2E86C1;
}
</style>
<div class='insights-box'>
1. It grows well in dry and poor soil where other crops struggle.<br>
2. Requires very little water and nutrients, making it cost-effective.<br>
3. Has a stable market demand.<br>
</div>
""", unsafe_allow_html=True)
st.markdown("<h2 style='color: #2E86C1; font-size: 24px;'> Rainfall vs. Crop</h2>", unsafe_allow_html=True)
fig, ax = plt.subplots(figsize=(4, 2.5)) # Reduced size
sns.scatterplot(data=df, x="Rainfall", y="Crop", hue="Crop", palette="rainbow", legend=False, ax=ax,s=10)
ax.set_xlabel("Rainfall (mm)", fontsize=6) # Decreased label size
ax.set_ylabel("Crop", fontsize=6) # Decreased label size
ax.tick_params(axis='x', labelsize=4) # Decreased x-axis tick size
ax.tick_params(axis='y', labelsize=5) # Decreased y-axis tick size
st.pyplot(fig)
st.markdown("<h2 style='color: #2E86C1; font-size: 24px;'> Temperature vs. Humidity</h2>", unsafe_allow_html=True)
fig, ax = plt.subplots(figsize=(4, 2.5)) # Reduced size
sns.scatterplot(data=df, x="Temperature", y="Humidity", alpha=0.6, color="blue", ax=ax,s=10)
ax.set_xlabel("Temperature (°C)", fontsize=6) # Decreased label size
ax.set_ylabel("Humidity (%)", fontsize=6) # Decreased label size
ax.tick_params(axis='x', labelsize=5) # Decreased x-axis tick size
ax.tick_params(axis='y', labelsize=5) # Decreased y-axis tick size
st.pyplot(fig)
st.markdown("<h2 style='color: #2E86C1; font-size: 24px;'> Correlation Heatmap of Soil & Climate Factors</h2>", unsafe_allow_html=True)
corr_matrix = df[['Nitrogen', 'Phosphorus', 'Potassium', 'Temperature', 'Humidity', 'pH_Value', 'Rainfall']].corr()
fig, ax = plt.subplots(figsize=(4, 2.5)) # Reduced size
heatmap = sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", ax=ax, annot_kws={"size": 5} ) # Reduced value size inside heatmap
ax.tick_params(axis='x', labelsize=5) # Decreased x-axis tick size
ax.tick_params(axis='y', labelsize=5) # Decreased y-axis tick size
cbar = heatmap.collections[0].colorbar
cbar.ax.tick_params(labelsize=4)
st.pyplot(fig)