Spaces:
Sleeping
Sleeping
Commit
·
fbd7328
1
Parent(s):
e4fb332
Added data viz and details about the data
Browse files- .gitignore +1 -2
- app.py +107 -31
- model.ipynb +16 -16
- requirements.txt +2 -2
- yield_df.csv +0 -0
.gitignore
CHANGED
|
@@ -1,3 +1,2 @@
|
|
| 1 |
.venv
|
| 2 |
-
.venv/
|
| 3 |
-
yield_df.csv
|
|
|
|
| 1 |
.venv
|
| 2 |
+
.venv/
|
|
|
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
import joblib
|
|
|
|
| 4 |
|
| 5 |
models = ["Linear Regression", "XGBoost", "Random Forests Regressor"]
|
| 6 |
|
|
@@ -71,50 +72,125 @@ def get_input_data(df: pd.core.frame.DataFrame):
|
|
| 71 |
"avg_temp": [avg_temp]
|
| 72 |
}), (item, area)
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
def main():
|
| 75 |
st.title("Crop Yield Predictor")
|
| 76 |
-
|
|
|
|
| 77 |
|
| 78 |
df = load_dataset('./yield_df.csv')
|
| 79 |
df = df.drop("Unnamed: 0", axis=1)
|
| 80 |
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
selected_model = st.selectbox("Which model do you want to use?",
|
| 88 |
-
tuple(models),
|
| 89 |
-
placeholder="Select your model"
|
| 90 |
-
)
|
| 91 |
-
|
| 92 |
-
with st.expander(f"Click to see performance of {selected_model}"):
|
| 93 |
-
if selected_model == models[0]:
|
| 94 |
-
st.image("./plots/lr_plot.png", caption="Linear Regression Plot")
|
| 95 |
-
elif selected_model == models[1]:
|
| 96 |
-
st.image("./plots/xgb_plot.png", caption="XG Boost Plot")
|
| 97 |
-
elif selected_model == models[2]:
|
| 98 |
-
st.image("./plots/rf_plot.png", caption="Random Forests Regressor Plot")
|
| 99 |
-
|
| 100 |
-
if st.button("Predict yield!"):
|
| 101 |
|
| 102 |
col1, col2 = st.columns(2)
|
| 103 |
-
col1.metric("Area", area, border=True)
|
| 104 |
-
col2.metric("Item", item, border=True)
|
| 105 |
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
col4.metric("Pestiscide Usage (Tonne)", input_data['pesticides_tonnes'], border=True)
|
| 109 |
-
col5.metric("Average Temperature (Celcius)", input_data['avg_temp'], border=True)
|
| 110 |
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
-
|
| 114 |
|
| 115 |
-
|
|
|
|
| 116 |
|
| 117 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
if __name__ == "__main__":
|
| 120 |
main()
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
import joblib
|
| 4 |
+
import plotly.express as px
|
| 5 |
|
| 6 |
models = ["Linear Regression", "XGBoost", "Random Forests Regressor"]
|
| 7 |
|
|
|
|
| 72 |
"avg_temp": [avg_temp]
|
| 73 |
}), (item, area)
|
| 74 |
|
| 75 |
+
def plot_map(countries: pd.Series) -> None:
|
| 76 |
+
"""
|
| 77 |
+
Plots the world map and highlights the countries that are frequent.
|
| 78 |
+
|
| 79 |
+
Args:
|
| 80 |
+
countries (pd.Series): A pandas series of the countries
|
| 81 |
+
"""
|
| 82 |
+
|
| 83 |
+
country_counts = countries.value_counts().reset_index()
|
| 84 |
+
country_counts.columns = ['country', 'count']
|
| 85 |
+
|
| 86 |
+
# Create a choropleth map
|
| 87 |
+
fig = px.choropleth(
|
| 88 |
+
country_counts,
|
| 89 |
+
locations='country',
|
| 90 |
+
locationmode='country names',
|
| 91 |
+
color='count',
|
| 92 |
+
hover_name='country',
|
| 93 |
+
color_continuous_scale='Blues',
|
| 94 |
+
title='Countries in the Dataset'
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
# Display in Streamlit
|
| 98 |
+
st.plotly_chart(fig)
|
| 99 |
+
|
| 100 |
def main():
|
| 101 |
st.title("Crop Yield Predictor")
|
| 102 |
+
|
| 103 |
+
tab1, tab2, tab3 = st.tabs(["About the Data", "Data Viz", "Model Inference"])
|
| 104 |
|
| 105 |
df = load_dataset('./yield_df.csv')
|
| 106 |
df = df.drop("Unnamed: 0", axis=1)
|
| 107 |
|
| 108 |
+
with tab1:
|
| 109 |
+
st.caption("The science of training machines to learn and produce models for future predictions is widely used, and not for nothing. Agriculture plays a critical role in the global economy. With the continuing expansion of the human population understanding worldwide crop yield is central to addressing food security challenges and reducing the impacts of climate change.")
|
| 110 |
+
st.caption(" Crop yield prediction is an important agricultural problem. The Agricultural yield primarily depends on weather conditions (rain, temperature, etc), pesticides and accurate information about history of crop yield is an important thing for making decisions related to agricultural risk management and future predictions.")
|
| 111 |
+
|
| 112 |
+
st.dataframe(df, height=300, width=900)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
col1, col2 = st.columns(2)
|
|
|
|
|
|
|
| 115 |
|
| 116 |
+
col1.caption("**Area**: Geographic region or country where the crop is cultivated, serving as a key factor in yield variations due to climate, soil, and regional practices.")
|
| 117 |
+
col2.caption("**Item**: Type of crop grown (e.g., wheat, rice), essential for modeling yield patterns and crop-specific responses to environmental factors.")
|
|
|
|
|
|
|
| 118 |
|
| 119 |
+
col1.caption("**Year**: Time of harvest, helping analyze yield trends, seasonal patterns, and the impact of climate change over time.")
|
| 120 |
+
col2.caption("**hg/ha_yield**: Crop yield per hectare (hectograms per hectare), the target variable indicating agricultural productivity for each crop and region.")
|
| 121 |
+
|
| 122 |
+
col1.caption("**average_rain_fall_mm_per_year**: Annual rainfall measured in millimeters, a critical environmental factor influencing crop growth and yield.")
|
| 123 |
+
col2.caption("**pesticides_tonnes**: Total pesticides applied (in tonnes), providing insight into pest control measures and their impact on crop yield.")
|
| 124 |
+
|
| 125 |
+
col1.caption("**avg_temp**: Average annual temperature (°C), a vital climate factor affecting crop growth cycles, maturity rates, and overall yield.")
|
| 126 |
+
|
| 127 |
+
st.divider()
|
| 128 |
+
|
| 129 |
+
with tab2:
|
| 130 |
+
plot_map(df['Area'])
|
| 131 |
+
st.caption("The world map plot above showcases each country and its frequency in the dataset.")
|
| 132 |
|
| 133 |
+
st.divider()
|
| 134 |
|
| 135 |
+
x = st.selectbox("Choose X for plotting.", tuple(df.columns))
|
| 136 |
+
y = st.selectbox("Choose Y for plotting.", tuple(df.drop(x, axis=1).columns))
|
| 137 |
|
| 138 |
+
plot = st.selectbox("Select type of plot.", ("Scatter", "Bar", "Line"))
|
| 139 |
+
|
| 140 |
+
if st.button("Plot X and Y!"):
|
| 141 |
+
if plot == "Scatter":
|
| 142 |
+
st.scatter_chart(
|
| 143 |
+
data=df,
|
| 144 |
+
x=x,
|
| 145 |
+
y=y,
|
| 146 |
+
size='hg/ha_yield'
|
| 147 |
+
)
|
| 148 |
+
elif plot == "Bar":
|
| 149 |
+
st.bar_chart(
|
| 150 |
+
data=df,
|
| 151 |
+
x=x,
|
| 152 |
+
y=y
|
| 153 |
+
)
|
| 154 |
+
elif plot == "Line":
|
| 155 |
+
st.line_chart(
|
| 156 |
+
data=df,
|
| 157 |
+
x=x,
|
| 158 |
+
y=y
|
| 159 |
+
)
|
| 160 |
+
with tab3:
|
| 161 |
+
input_data, (item, area) = get_input_data(df)
|
| 162 |
+
|
| 163 |
+
selected_model = st.selectbox("Which model do you want to use?",
|
| 164 |
+
tuple(models),
|
| 165 |
+
placeholder="Select your model"
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
with st.expander(f"Click to see performance of {selected_model}"):
|
| 169 |
+
if selected_model == models[0]:
|
| 170 |
+
st.image("./plots/lr_plot.png", caption="Linear Regression Plot")
|
| 171 |
+
elif selected_model == models[1]:
|
| 172 |
+
st.image("./plots/xgb_plot.png", caption="XG Boost Plot")
|
| 173 |
+
elif selected_model == models[2]:
|
| 174 |
+
st.image("./plots/rf_plot.png", caption="Random Forests Regressor Plot")
|
| 175 |
+
|
| 176 |
+
if st.button("Predict yield!"):
|
| 177 |
+
|
| 178 |
+
col1, col2 = st.columns(2)
|
| 179 |
+
col1.metric("Area", area, border=True)
|
| 180 |
+
col2.metric("Item", item, border=True)
|
| 181 |
+
|
| 182 |
+
col3, col4, col5 = st.columns(3)
|
| 183 |
+
col3.metric("Average Rainfall", input_data['average_rain_fall_mm_per_year'], border=True)
|
| 184 |
+
col4.metric("Pestiscide Usage (Tonne)", input_data['pesticides_tonnes'], border=True)
|
| 185 |
+
col5.metric("Average Temperature (Celcius)", input_data['avg_temp'], border=True)
|
| 186 |
+
|
| 187 |
+
model, scaler = load_model(selected_model)
|
| 188 |
+
|
| 189 |
+
input_scaled = scaler.transform(input_data)
|
| 190 |
+
|
| 191 |
+
pred = model.predict(input_scaled)
|
| 192 |
+
|
| 193 |
+
st.header(f"Predicted Crop Yield: **{int(pred[0])}**")
|
| 194 |
|
| 195 |
if __name__ == "__main__":
|
| 196 |
main()
|
model.ipynb
CHANGED
|
@@ -20,7 +20,7 @@
|
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"cell_type": "code",
|
| 23 |
-
"execution_count":
|
| 24 |
"metadata": {},
|
| 25 |
"outputs": [],
|
| 26 |
"source": [
|
|
@@ -40,7 +40,7 @@
|
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"cell_type": "code",
|
| 43 |
-
"execution_count":
|
| 44 |
"metadata": {},
|
| 45 |
"outputs": [
|
| 46 |
{
|
|
@@ -215,7 +215,7 @@
|
|
| 215 |
"9 1485.0 121.0 15.36 "
|
| 216 |
]
|
| 217 |
},
|
| 218 |
-
"execution_count":
|
| 219 |
"metadata": {},
|
| 220 |
"output_type": "execute_result"
|
| 221 |
}
|
|
@@ -227,7 +227,7 @@
|
|
| 227 |
},
|
| 228 |
{
|
| 229 |
"cell_type": "code",
|
| 230 |
-
"execution_count":
|
| 231 |
"metadata": {},
|
| 232 |
"outputs": [
|
| 233 |
{
|
|
@@ -258,7 +258,7 @@
|
|
| 258 |
},
|
| 259 |
{
|
| 260 |
"cell_type": "code",
|
| 261 |
-
"execution_count":
|
| 262 |
"metadata": {},
|
| 263 |
"outputs": [
|
| 264 |
{
|
|
@@ -269,7 +269,7 @@
|
|
| 269 |
" dtype='object')"
|
| 270 |
]
|
| 271 |
},
|
| 272 |
-
"execution_count":
|
| 273 |
"metadata": {},
|
| 274 |
"output_type": "execute_result"
|
| 275 |
}
|
|
@@ -280,7 +280,7 @@
|
|
| 280 |
},
|
| 281 |
{
|
| 282 |
"cell_type": "code",
|
| 283 |
-
"execution_count":
|
| 284 |
"metadata": {},
|
| 285 |
"outputs": [],
|
| 286 |
"source": [
|
|
@@ -304,7 +304,7 @@
|
|
| 304 |
},
|
| 305 |
{
|
| 306 |
"cell_type": "code",
|
| 307 |
-
"execution_count":
|
| 308 |
"metadata": {},
|
| 309 |
"outputs": [
|
| 310 |
{
|
|
@@ -350,7 +350,7 @@
|
|
| 350 |
},
|
| 351 |
{
|
| 352 |
"cell_type": "code",
|
| 353 |
-
"execution_count":
|
| 354 |
"metadata": {},
|
| 355 |
"outputs": [
|
| 356 |
{
|
|
@@ -381,7 +381,7 @@
|
|
| 381 |
},
|
| 382 |
{
|
| 383 |
"cell_type": "code",
|
| 384 |
-
"execution_count":
|
| 385 |
"metadata": {},
|
| 386 |
"outputs": [
|
| 387 |
{
|
|
@@ -425,7 +425,7 @@
|
|
| 425 |
},
|
| 426 |
{
|
| 427 |
"cell_type": "code",
|
| 428 |
-
"execution_count":
|
| 429 |
"metadata": {},
|
| 430 |
"outputs": [
|
| 431 |
{
|
|
@@ -478,7 +478,7 @@
|
|
| 478 |
},
|
| 479 |
{
|
| 480 |
"cell_type": "code",
|
| 481 |
-
"execution_count":
|
| 482 |
"metadata": {},
|
| 483 |
"outputs": [
|
| 484 |
{
|
|
@@ -588,7 +588,7 @@
|
|
| 588 |
"4 1485.0 121.0 16.37 "
|
| 589 |
]
|
| 590 |
},
|
| 591 |
-
"execution_count":
|
| 592 |
"metadata": {},
|
| 593 |
"output_type": "execute_result"
|
| 594 |
}
|
|
@@ -599,7 +599,7 @@
|
|
| 599 |
},
|
| 600 |
{
|
| 601 |
"cell_type": "code",
|
| 602 |
-
"execution_count":
|
| 603 |
"metadata": {},
|
| 604 |
"outputs": [],
|
| 605 |
"source": [
|
|
@@ -608,7 +608,7 @@
|
|
| 608 |
},
|
| 609 |
{
|
| 610 |
"cell_type": "code",
|
| 611 |
-
"execution_count":
|
| 612 |
"metadata": {},
|
| 613 |
"outputs": [
|
| 614 |
{
|
|
@@ -619,7 +619,7 @@
|
|
| 619 |
" dtype='object')"
|
| 620 |
]
|
| 621 |
},
|
| 622 |
-
"execution_count":
|
| 623 |
"metadata": {},
|
| 624 |
"output_type": "execute_result"
|
| 625 |
}
|
|
|
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"cell_type": "code",
|
| 23 |
+
"execution_count": 1,
|
| 24 |
"metadata": {},
|
| 25 |
"outputs": [],
|
| 26 |
"source": [
|
|
|
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"cell_type": "code",
|
| 43 |
+
"execution_count": 2,
|
| 44 |
"metadata": {},
|
| 45 |
"outputs": [
|
| 46 |
{
|
|
|
|
| 215 |
"9 1485.0 121.0 15.36 "
|
| 216 |
]
|
| 217 |
},
|
| 218 |
+
"execution_count": 2,
|
| 219 |
"metadata": {},
|
| 220 |
"output_type": "execute_result"
|
| 221 |
}
|
|
|
|
| 227 |
},
|
| 228 |
{
|
| 229 |
"cell_type": "code",
|
| 230 |
+
"execution_count": 3,
|
| 231 |
"metadata": {},
|
| 232 |
"outputs": [
|
| 233 |
{
|
|
|
|
| 258 |
},
|
| 259 |
{
|
| 260 |
"cell_type": "code",
|
| 261 |
+
"execution_count": 4,
|
| 262 |
"metadata": {},
|
| 263 |
"outputs": [
|
| 264 |
{
|
|
|
|
| 269 |
" dtype='object')"
|
| 270 |
]
|
| 271 |
},
|
| 272 |
+
"execution_count": 4,
|
| 273 |
"metadata": {},
|
| 274 |
"output_type": "execute_result"
|
| 275 |
}
|
|
|
|
| 280 |
},
|
| 281 |
{
|
| 282 |
"cell_type": "code",
|
| 283 |
+
"execution_count": 5,
|
| 284 |
"metadata": {},
|
| 285 |
"outputs": [],
|
| 286 |
"source": [
|
|
|
|
| 304 |
},
|
| 305 |
{
|
| 306 |
"cell_type": "code",
|
| 307 |
+
"execution_count": 6,
|
| 308 |
"metadata": {},
|
| 309 |
"outputs": [
|
| 310 |
{
|
|
|
|
| 350 |
},
|
| 351 |
{
|
| 352 |
"cell_type": "code",
|
| 353 |
+
"execution_count": 7,
|
| 354 |
"metadata": {},
|
| 355 |
"outputs": [
|
| 356 |
{
|
|
|
|
| 381 |
},
|
| 382 |
{
|
| 383 |
"cell_type": "code",
|
| 384 |
+
"execution_count": 8,
|
| 385 |
"metadata": {},
|
| 386 |
"outputs": [
|
| 387 |
{
|
|
|
|
| 425 |
},
|
| 426 |
{
|
| 427 |
"cell_type": "code",
|
| 428 |
+
"execution_count": 9,
|
| 429 |
"metadata": {},
|
| 430 |
"outputs": [
|
| 431 |
{
|
|
|
|
| 478 |
},
|
| 479 |
{
|
| 480 |
"cell_type": "code",
|
| 481 |
+
"execution_count": 10,
|
| 482 |
"metadata": {},
|
| 483 |
"outputs": [
|
| 484 |
{
|
|
|
|
| 588 |
"4 1485.0 121.0 16.37 "
|
| 589 |
]
|
| 590 |
},
|
| 591 |
+
"execution_count": 10,
|
| 592 |
"metadata": {},
|
| 593 |
"output_type": "execute_result"
|
| 594 |
}
|
|
|
|
| 599 |
},
|
| 600 |
{
|
| 601 |
"cell_type": "code",
|
| 602 |
+
"execution_count": 11,
|
| 603 |
"metadata": {},
|
| 604 |
"outputs": [],
|
| 605 |
"source": [
|
|
|
|
| 608 |
},
|
| 609 |
{
|
| 610 |
"cell_type": "code",
|
| 611 |
+
"execution_count": 12,
|
| 612 |
"metadata": {},
|
| 613 |
"outputs": [
|
| 614 |
{
|
|
|
|
| 619 |
" dtype='object')"
|
| 620 |
]
|
| 621 |
},
|
| 622 |
+
"execution_count": 12,
|
| 623 |
"metadata": {},
|
| 624 |
"output_type": "execute_result"
|
| 625 |
}
|
requirements.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
pandas==2.2.3
|
| 2 |
streamlit==1.42.0
|
| 3 |
scikit-learn==1.6.1
|
| 4 |
-
|
| 5 |
-
|
|
|
|
| 1 |
pandas==2.2.3
|
| 2 |
streamlit==1.42.0
|
| 3 |
scikit-learn==1.6.1
|
| 4 |
+
matplotlib==3.10.0
|
| 5 |
+
plotly
|
yield_df.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|