coffee-yield-forecast / predict_yield.py
Zsirak's picture
Upload 6 files
ed237a9 verified
import pandas as pd
import numpy as np
import joblib
import os
# Load the trained model
rf_model = joblib.load(os.path.join(os.path.dirname(__file__), 'random_forest_model.joblib'))
# Load historical data
data_path = os.path.join(os.path.dirname(__file__), 'sidama_data_2020_2024_combined.csv')
df_merged = pd.read_csv(data_path)
# Define features
features = [
'ndvi_mean_mean_flowering', 'ndvi_mean_mean_fruit', 'ndvi_mean_max_season',
'soil_moisture_mean_mean_season', 'Rainfall_mean_fruit', 'Humidity_mean_season',
'elevation_mean'
]
# Get the feature values for the most recent year (2024)
features_2024 = df_merged[df_merged['year'] == 2024][features].iloc[0].to_dict()
def get_features(year):
# If the year is in the historical data (2020�2024), use the actual features
if year in df_merged['year'].values:
feature_values = df_merged[df_merged['year'] == year][features].iloc[0].to_dict()
else:
# For future years, use the 2024 feature values
feature_values = features_2024.copy()
return feature_values
def predict_yield(year, hectares):
# Get feature values for the year
feature_values = get_features(year)
# Create a DataFrame for the input
input_data = pd.DataFrame([feature_values.values()], columns=features)
# Predict yield per hectare
yield_per_ha = rf_model.predict(input_data)[0]
# Calculate total yield
total_yield_kg = yield_per_ha * hectares
# Compute confidence interval (per hectare)
lower_bound = yield_per_ha - 1.96 * 173.0
upper_bound = yield_per_ha + 1.96 * 173.0
# Compute total yield confidence interval
total_lower_bound = lower_bound * hectares
total_upper_bound = upper_bound * hectares
return {
'year': year,
'hectares': hectares,
'predicted_yield_per_ha_kg': yield_per_ha,
'total_predicted_yield_kg': total_yield_kg,
'confidence_interval_per_ha_lower': lower_bound,
'confidence_interval_per_ha_upper': upper_bound,
'total_confidence_interval_lower': total_lower_bound,
'total_confidence_interval_upper': total_upper_bound
}
# Make sure function is exposed at module level
__all__ = ['predict_yield']
if __name__ == "__main__":
year = 2025
hectares = 2.5
result = predict_yield(year, hectares)
print(f"Predicted coffee yield for {result['year']} (per hectare): {result['predicted_yield_per_ha_kg']:.2f} kg/ha")
print(f"Total predicted yield for {result['hectares']} hectares: {result['total_predicted_yield_kg']:.2f} kg")
print(f"95% Confidence Interval (per hectare): {result['confidence_interval_per_ha_lower']:.2f}{result['confidence_interval_per_ha_upper']:.2f} kg/ha")
print(f"95% Confidence Interval (total): {result['total_confidence_interval_lower']:.2f}{result['total_confidence_interval_upper']:.2f} kg")