Spaces:
Runtime error
Runtime error
File size: 2,979 Bytes
ed237a9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import pandas as pd
import numpy as np
import joblib
import os
# Load the trained model
rf_model = joblib.load(os.path.join(os.path.dirname(__file__), 'random_forest_model.joblib'))
# Load historical data
data_path = os.path.join(os.path.dirname(__file__), 'sidama_data_2020_2024_combined.csv')
df_merged = pd.read_csv(data_path)
# Define features
features = [
'ndvi_mean_mean_flowering', 'ndvi_mean_mean_fruit', 'ndvi_mean_max_season',
'soil_moisture_mean_mean_season', 'Rainfall_mean_fruit', 'Humidity_mean_season',
'elevation_mean'
]
# Get the feature values for the most recent year (2024)
features_2024 = df_merged[df_merged['year'] == 2024][features].iloc[0].to_dict()
def get_features(year):
# If the year is in the historical data (2020�2024), use the actual features
if year in df_merged['year'].values:
feature_values = df_merged[df_merged['year'] == year][features].iloc[0].to_dict()
else:
# For future years, use the 2024 feature values
feature_values = features_2024.copy()
return feature_values
def predict_yield(year, hectares):
# Get feature values for the year
feature_values = get_features(year)
# Create a DataFrame for the input
input_data = pd.DataFrame([feature_values.values()], columns=features)
# Predict yield per hectare
yield_per_ha = rf_model.predict(input_data)[0]
# Calculate total yield
total_yield_kg = yield_per_ha * hectares
# Compute confidence interval (per hectare)
lower_bound = yield_per_ha - 1.96 * 173.0
upper_bound = yield_per_ha + 1.96 * 173.0
# Compute total yield confidence interval
total_lower_bound = lower_bound * hectares
total_upper_bound = upper_bound * hectares
return {
'year': year,
'hectares': hectares,
'predicted_yield_per_ha_kg': yield_per_ha,
'total_predicted_yield_kg': total_yield_kg,
'confidence_interval_per_ha_lower': lower_bound,
'confidence_interval_per_ha_upper': upper_bound,
'total_confidence_interval_lower': total_lower_bound,
'total_confidence_interval_upper': total_upper_bound
}
# Make sure function is exposed at module level
__all__ = ['predict_yield']
if __name__ == "__main__":
year = 2025
hectares = 2.5
result = predict_yield(year, hectares)
print(f"Predicted coffee yield for {result['year']} (per hectare): {result['predicted_yield_per_ha_kg']:.2f} kg/ha")
print(f"Total predicted yield for {result['hectares']} hectares: {result['total_predicted_yield_kg']:.2f} kg")
print(f"95% Confidence Interval (per hectare): {result['confidence_interval_per_ha_lower']:.2f} � {result['confidence_interval_per_ha_upper']:.2f} kg/ha")
print(f"95% Confidence Interval (total): {result['total_confidence_interval_lower']:.2f} � {result['total_confidence_interval_upper']:.2f} kg")
|