Spaces:

hydraadra112
/

Crop_Yield_Estimates

Sleeping

App Files Files Community

hydraadra112 commited on Feb 16, 2025

Commit

fbd7328

1 Parent(s): e4fb332

Added data viz and details about the data

Browse files

Files changed (5) hide show

.gitignore +1 -2
app.py +107 -31
model.ipynb +16 -16
requirements.txt +2 -2
yield_df.csv +0 -0

.gitignore CHANGED Viewed

@@ -1,3 +1,2 @@
 .venv
-.venv/
-yield_df.csv


1	.venv
2	+ .venv/

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import streamlit as st
 import pandas as pd
 import joblib
 models = ["Linear Regression", "XGBoost", "Random Forests Regressor"]
@@ -71,50 +72,125 @@ def get_input_data(df: pd.core.frame.DataFrame):
     "avg_temp": [avg_temp]
     }), (item, area)
 def main():
     st.title("Crop Yield Predictor")
-    st.caption("The science of training machines to learn and produce models for future predictions is widely used, and not for nothing. Agriculture plays a critical role in the global economy. With the continuing expansion of the human population understanding worldwide crop yield is central to addressing food security challenges and reducing the impacts of climate change. Crop yield prediction is an important agricultural problem. The Agricultural yield primarily depends on weather conditions (rain, temperature, etc), pesticides and accurate information about history of crop yield is an important thing for making decisions related to agricultural risk management and future predictions.")
     df = load_dataset('./yield_df.csv')
     df = df.drop("Unnamed: 0", axis=1)
-    st.dataframe(df, height=300, width=900)
-    st.divider()
-    input_data, (item, area) = get_input_data(df)
-    selected_model = st.selectbox("Which model do you want to use?",
-                                  tuple(models),
-                                  placeholder="Select your model"
-                                  )
-    with st.expander(f"Click to see performance of {selected_model}"):
-        if selected_model == models[0]:
-            st.image("./plots/lr_plot.png", caption="Linear Regression Plot")
-        elif selected_model == models[1]:
-            st.image("./plots/xgb_plot.png", caption="XG Boost Plot")
-        elif selected_model == models[2]:
-            st.image("./plots/rf_plot.png", caption="Random Forests Regressor Plot")
-    if st.button("Predict yield!"):
         col1, col2 = st.columns(2)
-        col1.metric("Area", area, border=True)
-        col2.metric("Item", item, border=True)
-        col3, col4, col5 = st.columns(3)
-        col3.metric("Average Rainfall", input_data['average_rain_fall_mm_per_year'], border=True)
-        col4.metric("Pestiscide Usage (Tonne)", input_data['pesticides_tonnes'], border=True)
-        col5.metric("Average Temperature (Celcius)", input_data['avg_temp'], border=True)
-        model, scaler = load_model(selected_model)
-        input_scaled = scaler.transform(input_data)
-        pred = model.predict(input_scaled)
-        st.header(f"Predicted Crop Yield: **{int(pred[0])}**")
 if __name__ == "__main__":
     main()

 import streamlit as st
 import pandas as pd
 import joblib
+import plotly.express as px
 models = ["Linear Regression", "XGBoost", "Random Forests Regressor"]
     "avg_temp": [avg_temp]
     }), (item, area)
+def plot_map(countries: pd.Series) -> None:
+    """
+    Plots the world map and highlights the countries that are frequent.
+    Args:
+        countries (pd.Series): A pandas series of the countries
+    """
+    country_counts = countries.value_counts().reset_index()
+    country_counts.columns = ['country', 'count']
+    # Create a choropleth map
+    fig = px.choropleth(
+        country_counts,
+        locations='country',
+        locationmode='country names',
+        color='count',
+        hover_name='country',
+        color_continuous_scale='Blues',
+        title='Countries in the Dataset'
+    )
+    # Display in Streamlit
+    st.plotly_chart(fig)
 def main():
     st.title("Crop Yield Predictor")
+    tab1, tab2, tab3 = st.tabs(["About the Data", "Data Viz", "Model Inference"])
     df = load_dataset('./yield_df.csv')
     df = df.drop("Unnamed: 0", axis=1)
+    with tab1:
+        st.caption("The science of training machines to learn and produce models for future predictions is widely used, and not for nothing. Agriculture plays a critical role in the global economy. With the continuing expansion of the human population understanding worldwide crop yield is central to addressing food security challenges and reducing the impacts of climate change.")
+        st.caption(" Crop yield prediction is an important agricultural problem. The Agricultural yield primarily depends on weather conditions (rain, temperature, etc), pesticides and accurate information about history of crop yield is an important thing for making decisions related to agricultural risk management and future predictions.")
+        st.dataframe(df, height=300, width=900)
         col1, col2 = st.columns(2)
+        col1.caption("**Area**: Geographic region or country where the crop is cultivated, serving as a key factor in yield variations due to climate, soil, and regional practices.")
+        col2.caption("**Item**: Type of crop grown (e.g., wheat, rice), essential for modeling yield patterns and crop-specific responses to environmental factors.")
+        col1.caption("**Year**: Time of harvest, helping analyze yield trends, seasonal patterns, and the impact of climate change over time.")
+        col2.caption("**hg/ha_yield**: Crop yield per hectare (hectograms per hectare), the target variable indicating agricultural productivity for each crop and region.")
+        col1.caption("**average_rain_fall_mm_per_year**: Annual rainfall measured in millimeters, a critical environmental factor influencing crop growth and yield.")
+        col2.caption("**pesticides_tonnes**: Total pesticides applied (in tonnes), providing insight into pest control measures and their impact on crop yield.")
+        col1.caption("**avg_temp**: Average annual temperature (°C), a vital climate factor affecting crop growth cycles, maturity rates, and overall yield.")
+    st.divider()
+    with tab2:
+        plot_map(df['Area'])
+        st.caption("The world map plot above showcases each country and its frequency in the dataset.")
+        st.divider()
+        x = st.selectbox("Choose X for plotting.", tuple(df.columns))
+        y = st.selectbox("Choose Y for plotting.", tuple(df.drop(x, axis=1).columns))
+        plot = st.selectbox("Select type of plot.", ("Scatter", "Bar", "Line"))
+        if st.button("Plot X and Y!"):
+            if plot == "Scatter":
+                st.scatter_chart(
+                    data=df,
+                    x=x,
+                    y=y,
+                    size='hg/ha_yield'
+                )
+            elif plot == "Bar":
+                st.bar_chart(
+                    data=df,
+                    x=x,
+                    y=y
+                )
+            elif plot == "Line":
+                st.line_chart(
+                    data=df,
+                    x=x,
+                    y=y
+                    )
+    with tab3:
+        input_data, (item, area) = get_input_data(df)
+        selected_model = st.selectbox("Which model do you want to use?",
+                                    tuple(models),
+                                    placeholder="Select your model"
+                                    )
+        with st.expander(f"Click to see performance of {selected_model}"):
+            if selected_model == models[0]:
+                st.image("./plots/lr_plot.png", caption="Linear Regression Plot")
+            elif selected_model == models[1]:
+                st.image("./plots/xgb_plot.png", caption="XG Boost Plot")
+            elif selected_model == models[2]:
+                st.image("./plots/rf_plot.png", caption="Random Forests Regressor Plot")
+        if st.button("Predict yield!"):
+            col1, col2 = st.columns(2)
+            col1.metric("Area", area, border=True)
+            col2.metric("Item", item, border=True)
+            col3, col4, col5 = st.columns(3)
+            col3.metric("Average Rainfall", input_data['average_rain_fall_mm_per_year'], border=True)
+            col4.metric("Pestiscide Usage (Tonne)", input_data['pesticides_tonnes'], border=True)
+            col5.metric("Average Temperature (Celcius)", input_data['avg_temp'], border=True)
+            model, scaler = load_model(selected_model)
+            input_scaled = scaler.transform(input_data)
+            pred = model.predict(input_scaled)
+            st.header(f"Predicted Crop Yield: **{int(pred[0])}**")
 if __name__ == "__main__":
     main()

model.ipynb CHANGED Viewed

@@ -20,7 +20,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -40,7 +40,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
@@ -215,7 +215,7 @@
        "9                         1485.0              121.0     15.36  "
       ]
      },
-     "execution_count": 34,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -227,7 +227,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
@@ -258,7 +258,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
    "metadata": {},
    "outputs": [
     {
@@ -269,7 +269,7 @@
        "      dtype='object')"
       ]
      },
-     "execution_count": 36,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -280,7 +280,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -304,7 +304,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
    "metadata": {},
    "outputs": [
     {
@@ -350,7 +350,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
    "metadata": {},
    "outputs": [
     {
@@ -381,7 +381,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
    "metadata": {},
    "outputs": [
     {
@@ -425,7 +425,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
    "metadata": {},
    "outputs": [
     {
@@ -478,7 +478,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
    "metadata": {},
    "outputs": [
     {
@@ -588,7 +588,7 @@
        "4                         1485.0              121.0     16.37  "
       ]
      },
-     "execution_count": 42,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -599,7 +599,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -608,7 +608,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
    "metadata": {},
    "outputs": [
     {
@@ -619,7 +619,7 @@
        "      dtype='object')"
       ]
      },
-     "execution_count": 44,
      "metadata": {},
      "output_type": "execute_result"
     }

   },
   {
    "cell_type": "code",
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
        "9                         1485.0              121.0     15.36  "
       ]
      },
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
        "      dtype='object')"
       ]
      },
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
        "4                         1485.0              121.0     16.37  "
       ]
      },
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
        "      dtype='object')"
       ]
      },
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 pandas==2.2.3
 streamlit==1.42.0
 scikit-learn==1.6.1
-kagglehub==0.3.7
-matplotlib==3.10.0

 pandas==2.2.3
 streamlit==1.42.0
 scikit-learn==1.6.1
+matplotlib==3.10.0
+plotly

yield_df.csv ADDED Viewed

The diff for this file is too large to render. See raw diff