import pandas as pd import numpy as np import joblib import os # Load the trained model rf_model = joblib.load(os.path.join(os.path.dirname(__file__), 'random_forest_model.joblib')) # Load historical data data_path = os.path.join(os.path.dirname(__file__), 'sidama_data_2020_2024_combined.csv') df_merged = pd.read_csv(data_path) # Define features features = [ 'ndvi_mean_mean_flowering', 'ndvi_mean_mean_fruit', 'ndvi_mean_max_season', 'soil_moisture_mean_mean_season', 'Rainfall_mean_fruit', 'Humidity_mean_season', 'elevation_mean' ] # Get the feature values for the most recent year (2024) features_2024 = df_merged[df_merged['year'] == 2024][features].iloc[0].to_dict() def get_features(year): # If the year is in the historical data (2020�2024), use the actual features if year in df_merged['year'].values: feature_values = df_merged[df_merged['year'] == year][features].iloc[0].to_dict() else: # For future years, use the 2024 feature values feature_values = features_2024.copy() return feature_values def predict_yield(year, hectares): # Get feature values for the year feature_values = get_features(year) # Create a DataFrame for the input input_data = pd.DataFrame([feature_values.values()], columns=features) # Predict yield per hectare yield_per_ha = rf_model.predict(input_data)[0] # Calculate total yield total_yield_kg = yield_per_ha * hectares # Compute confidence interval (per hectare) lower_bound = yield_per_ha - 1.96 * 173.0 upper_bound = yield_per_ha + 1.96 * 173.0 # Compute total yield confidence interval total_lower_bound = lower_bound * hectares total_upper_bound = upper_bound * hectares return { 'year': year, 'hectares': hectares, 'predicted_yield_per_ha_kg': yield_per_ha, 'total_predicted_yield_kg': total_yield_kg, 'confidence_interval_per_ha_lower': lower_bound, 'confidence_interval_per_ha_upper': upper_bound, 'total_confidence_interval_lower': total_lower_bound, 'total_confidence_interval_upper': total_upper_bound } # Make sure function is exposed at module level __all__ = ['predict_yield'] if __name__ == "__main__": year = 2025 hectares = 2.5 result = predict_yield(year, hectares) print(f"Predicted coffee yield for {result['year']} (per hectare): {result['predicted_yield_per_ha_kg']:.2f} kg/ha") print(f"Total predicted yield for {result['hectares']} hectares: {result['total_predicted_yield_kg']:.2f} kg") print(f"95% Confidence Interval (per hectare): {result['confidence_interval_per_ha_lower']:.2f} � {result['confidence_interval_per_ha_upper']:.2f} kg/ha") print(f"95% Confidence Interval (total): {result['total_confidence_interval_lower']:.2f} � {result['total_confidence_interval_upper']:.2f} kg")