import streamlit as st import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt st.markdown( "

Data Frame

", unsafe_allow_html=True ) df = pd.read_csv("https://huggingface.co/spaces/varshitha22/Crop_Recommendation/resolve/main/Crop_Recommendation.csv") df1 = df.head() st.dataframe(df1) # Display the dataframe in Streamlit # Load dataset df = pd.read_csv("https://huggingface.co/spaces/varshitha22/Crop_Recommendation/resolve/main/Crop_Recommendation.csv") # Define numerical columns num_cols = ['Nitrogen', 'Phosphorus', 'Potassium', 'Temperature', 'Humidity', 'pH_Value', 'Rainfall'] # Title st.markdown("

Outlier Detection and Handling

", unsafe_allow_html=True) # --- Boxplot Visualization --- st.markdown("

Outlier Detection (Boxplots)

", unsafe_allow_html=True) fig, ax = plt.subplots(2, 4, figsize=(12, 6)) ax = ax.flatten() for i, col in enumerate(num_cols): sns.boxplot(x=df[col], ax=ax[i], color="skyblue") ax[i].set_title(col) plt.tight_layout() st.pyplot(fig) # --- Outlier Handling --- st.markdown("

Outlier Handling using IQR

", unsafe_allow_html=True) outlier_counts = {} for col in num_cols: Q1 = df[col].quantile(0.25) Q3 = df[col].quantile(0.75) IQR = Q3 - Q1 lower_bound = Q1 - 1.5 * IQR upper_bound = Q3 + 1.5 * IQR outliers = df[(df[col] < lower_bound) | (df[col] > upper_bound)] outlier_counts[col] = len(outliers) # Replace outliers with mean df[col] = np.where((df[col] < lower_bound) | (df[col] > upper_bound), df[col].mean(), df[col]) # Display Outlier Counts st.write("Number of Outliers Detected:") st.write(outlier_counts) # Title with color st.markdown("

Why Use the IQR Method?

", unsafe_allow_html=True) # Explanation with smaller font size st.markdown("""

1. Other methods like mean and standard deviation can be heavily influenced by extreme values.

2. IQR focuses only on the middle 50% of data (between Q1 and Q3), making it less affected by extreme values.

3. Other methods may remove outliers entirely, leading to data loss.

4. Instead of dropping rows, the IQR method replaces outliers with the mean of the column, keeping the dataset size the same.

5. This is useful when we don’t want to lose important information but still need to control extreme values.

""", unsafe_allow_html=True) st.markdown("

Crop Recommendation System - Feature Visualization

", unsafe_allow_html=True) # List of available visualizations visualizations = [ "Nitrogen Requirement per Crop", "Phosphorus Requirement per Crop", "Potassium Requirement per Crop", "Temperature Distribution per Crop", "Humidity Distribution per Crop", "pH Value Distribution per Crop", "Rainfall Distribution per Crop" ] # Slider for visualization selection viz_option = st.selectbox( "Select a visualization:", options=visualizations ) # Function to plot feature distributions per crop def plot_feature(feature, title): avg_feature_per_crop = df.groupby("Crop")[feature].mean().reset_index() fig, axes = plt.subplots(1, 2, figsize=(10, 4)) # Decreased plot size # Bar plot sns.barplot(data=avg_feature_per_crop, x=feature, y="Crop", palette="rainbow", ax=axes[0]) axes[0].set_xlabel(f"Average {feature} per Crop") axes[0].set_ylabel("Crop") axes[0].set_title(f"Average {feature} per Crop") # KDE plot sns.kdeplot(data=avg_feature_per_crop, x=feature, fill=True, ax=axes[1]) axes[1].set_xlabel(feature) axes[1].set_ylabel("Density") axes[1].set_title(f"Distribution of {feature}") st.pyplot(fig) # Display selected plot feature_mapping = { "Nitrogen Requirement per Crop": "Nitrogen", "Phosphorus Requirement per Crop": "Phosphorus", "Potassium Requirement per Crop": "Potassium", "Temperature Distribution per Crop": "Temperature", "Humidity Distribution per Crop": "Humidity", "pH Value Distribution per Crop": "pH_Value", "Rainfall Distribution per Crop": "Rainfall", } plot_feature(feature_mapping[viz_option], viz_option) st.subheader("Proportion of Crops") crop_counts = df['Crop'].value_counts() explode = [0.1 if i == 0 else 0 for i in range(len(crop_counts))] fig, ax = plt.subplots(figsize=(4, 2.5)) # Reduced plot size ax.pie( crop_counts, labels=crop_counts.index, autopct='%1.1f%%', startangle=100, explode=explode, colors=sns.color_palette("rainbow", len(crop_counts)), textprops={'fontsize': 4} # Decreased label size ) ax.set_title("Proportion of Crops", fontsize=3) st.pyplot(fig) # Above-Average Nutrient Requirement Crops st.markdown("

Crops that require above-average soil nutrients

", unsafe_allow_html=True) # Calculate average nutrient levels avg_nitrogen = df['Nitrogen'].mean() avg_phosphorus = df['Phosphorus'].mean() avg_potassium = df['Potassium'].mean() # Function to plot bar charts def plot_nutrient_crops(data, title): crop_counts = data.value_counts() fig, ax = plt.subplots(figsize=(4, 2)) # Adjusted plot size for better display crop_counts.plot(kind='bar', color='skyblue', ax=ax) ax.set_xlabel('Crops', fontsize=4) ax.set_ylabel('Count', fontsize=4) ax.set_title(title, fontsize=4) ax.tick_params(axis='x', labelsize=4) # Adjust x-axis label size ax.tick_params(axis='y', labelsize=4) # Adjust y-axis label size st.pyplot(fig) # Plot: Crops in Nutrient-Rich Soil more_avg_of_soil_nutrients = df[ (df['Nitrogen'] >= avg_nitrogen) & (df['Phosphorus'] >= avg_phosphorus) & (df['Potassium'] >= avg_potassium) ]['Crop'] plot_nutrient_crops(more_avg_of_soil_nutrients, "Crops Growing in Nutrient-Rich Soil") # Insights for Nutrient-Rich Crops st.markdown("