TerraForm_AI / app.py
UnknownKeek@2005
Commit 5
e00bfb4
from fastapi import FastAPI, Query, HTTPException
from pydantic import BaseModel
import numpy as np
import pandas as pd
import xgboost as xgb
import joblib
import pickle
import os
from typing import List, Optional
app = FastAPI(
title="TerraForm AI",
description="AI model for predicting locations suitable for afforestation",
version="1.0.0"
)
# Create a models directory if it doesn't exist
os.makedirs('/tmp/models', exist_ok=True)
# Define model paths using the temporary directory
MODEL_PATH = '/tmp/models/model.pkl'
SCALER_PATH = '/tmp/models/scaler.pkl'
# Global variables
data = None
model = None
scaler = None
features = ['Average Annual Rainfall (inches)', 'Soil Suitability (0 to 1)',
'Wildlife Benefit Potential (0 to 1)', 'Population',
'Area available for afforestation (acres)', 'Lack of tree cover']
# Try to load the data but don't fail if it's not available
# We'll handle this in the startup event
try:
if os.path.exists('data.csv'):
data = pd.read_csv('data.csv')
if os.path.exists(MODEL_PATH):
with open(MODEL_PATH, 'rb') as f:
model = pickle.load(f)
if os.path.exists(SCALER_PATH):
with open(SCALER_PATH, 'rb') as f:
scaler = pickle.load(f)
except Exception as e:
print(f"Error loading data or model: {e}")
class LocationInput(BaseModel):
rainfall: float
soil_suitability: float
wildlife_potential: float
population: float
area: float
lack_of_tree_cover: float
class LocationResponse(BaseModel):
suitability: str
probability: float
@app.get("/")
def root():
return {
"message": "TerraForm AI API is running. Use /docs to explore the API."
}
@app.post("/predict", response_model=LocationResponse)
async def predict_suitability(location: LocationInput):
"""
Predict the suitability of a location for afforestation based on provided parameters.
- **rainfall**: Average annual rainfall in inches
- **soil_suitability**: Soil suitability rating (0 to 1)
- **wildlife_potential**: Wildlife benefit potential (0 to 1)
- **population**: Population count
- **area**: Area available for afforestation in acres
- **lack_of_tree_cover**: Measure of lack of tree cover (0 to 1)
"""
try:
# Create feature array for the new location as a DataFrame with proper column names
new_location = pd.DataFrame([[
location.rainfall,
location.soil_suitability,
location.wildlife_potential,
location.population,
location.area,
location.lack_of_tree_cover
]], columns=features)
# Apply scaling if scaler is available
if scaler is not None:
new_location_scaled = scaler.transform(new_location)
else:
new_location_scaled = new_location.values
# Make prediction
prediction = model.predict(new_location_scaled)[0]
probability = model.predict_proba(new_location_scaled)[0][1]
suitability = "Good" if prediction == 1 else "Not Good"
return LocationResponse(
suitability=suitability,
probability=float(probability)
)
except Exception as e:
import traceback
print(f"Error in prediction: {str(e)}")
print(traceback.format_exc())
raise HTTPException(status_code=500, detail=f"Prediction error: {str(e)}")
@app.get("/locations/{state}", response_model=List[dict])
async def get_state_locations(state: str):
"""
Get suitable locations for afforestation in the specified state.
"""
if data is None or model is None:
raise HTTPException(status_code=503, detail="Data or model not loaded")
state_data = data[data["State"] == state].copy()
if state_data.empty:
return []
# Prepare features for prediction
X_state = state_data[features]
# Apply scaling if needed
if scaler is not None:
X_state_scaled = scaler.transform(X_state)
else:
X_state_scaled = X_state.values
# Predict suitability
predictions = model.predict(X_state_scaled)
probabilities = model.predict_proba(X_state_scaled)[:, 1]
# Add predictions to the state data
state_data["Prediction"] = predictions
state_data["Probability"] = probabilities
# Filter for good locations (Prediction == 1)
good_locations = state_data[state_data["Prediction"] == 1]
if good_locations.empty:
return []
# Return the relevant data
return good_locations[["City", "Probability"]].sort_values(
by="Probability", ascending=False
).to_dict(orient="records")
@app.get("/states", response_model=List[str])
async def get_states():
"""
Get a list of all available states in the dataset.
"""
if data is None:
raise HTTPException(status_code=503, detail="Data not loaded")
return sorted(data["State"].unique().tolist())
@app.on_event("startup")
async def startup_event():
"""Train and save the model if it doesn't exist"""
global data, model, scaler, features
print("Starting up TerraForm AI application...")
# Check if data file exists
if not os.path.exists('data.csv'):
print("Error: data.csv file not found!")
return
# Load data if not loaded yet
if data is None:
try:
data = pd.read_csv('data.csv')
print("Successfully loaded data.csv")
except Exception as e:
print(f"Failed to load data.csv: {e}")
return
# Check if model and scaler need to be created
if not os.path.exists(MODEL_PATH) or not os.path.exists(SCALER_PATH):
print("Training model and creating necessary files...")
try:
# Importing necessary packages
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
# Data preparation
print(f"Data shape: {data.shape}")
print("Preparing data for model training...")
data['Normalized Rainfall'] = (data['Average Annual Rainfall (inches)'] - data['Average Annual Rainfall (inches)'].min()) / (data['Average Annual Rainfall (inches)'].max() - data['Average Annual Rainfall (inches)'].min())
data['Normalized Population'] = (data['Population'] - data['Population'].min()) / (data['Population'].max() - data['Population'].min())
data['Normalized Area'] = (data['Area available for afforestation (acres)'] - data['Area available for afforestation (acres)'].min()) / (data['Area available for afforestation (acres)'].max() - data['Area available for afforestation (acres)'].min())
data['afforestation_score'] = (
0.3 * data['Normalized Rainfall'] +
0.35 * data['Soil Suitability (0 to 1)'] +
0.1 * data['Wildlife Benefit Potential (0 to 1)'] -
0.08 * np.sqrt(data['Normalized Population']) +
0.07 * data['Normalized Area'] +
0.1 * data['Lack of tree cover']
)
# Define threshold
raw_threshold = 0.5
data["good_for_afforestation"] = (data["afforestation_score"] > raw_threshold).astype(int)
print("Splitting data for training...")
# Select features for modeling
X = data[features]
y = data['good_for_afforestation']
# Split data
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
print("Scaling data...")
# Create and fit the scaler - keeping the DataFrame structure
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
print("Training model...")
# Create and train the model
model = xgb.XGBClassifier(
max_depth=4,
learning_rate=0.1,
n_estimators=100,
subsample=0.9,
objective="binary:logistic",
random_state=42
)
model.fit(X_train_scaled, y_train)
print(f"Saving model to {MODEL_PATH} and scaler to {SCALER_PATH}...")
# Save the model and scaler to the temporary directory
with open(MODEL_PATH, 'wb') as f:
pickle.dump(model, f)
with open(SCALER_PATH, 'wb') as f:
pickle.dump(scaler, f)
print("Model and scaler saved successfully!")
except Exception as e:
print(f"Error during model training: {e}")
import traceback
traceback.print_exc()
return
else:
# Load model and scaler if they exist but weren't loaded
if model is None:
try:
with open(MODEL_PATH, 'rb') as f:
model = pickle.load(f)
print(f"Successfully loaded existing model from {MODEL_PATH}")
except Exception as e:
print(f"Failed to load model: {e}")
if scaler is None:
try:
with open(SCALER_PATH, 'rb') as f:
scaler = pickle.load(f)
print(f"Successfully loaded existing scaler from {SCALER_PATH}")
except Exception as e:
print(f"Failed to load scaler: {e}")
print("Application startup completed successfully!")