from fastapi import FastAPI, Query, HTTPException from pydantic import BaseModel import numpy as np import pandas as pd import xgboost as xgb import joblib import pickle import os from typing import List, Optional app = FastAPI( title="TerraForm AI", description="AI model for predicting locations suitable for afforestation", version="1.0.0" ) # Create a models directory if it doesn't exist os.makedirs('/tmp/models', exist_ok=True) # Define model paths using the temporary directory MODEL_PATH = '/tmp/models/model.pkl' SCALER_PATH = '/tmp/models/scaler.pkl' # Global variables data = None model = None scaler = None features = ['Average Annual Rainfall (inches)', 'Soil Suitability (0 to 1)', 'Wildlife Benefit Potential (0 to 1)', 'Population', 'Area available for afforestation (acres)', 'Lack of tree cover'] # Try to load the data but don't fail if it's not available # We'll handle this in the startup event try: if os.path.exists('data.csv'): data = pd.read_csv('data.csv') if os.path.exists(MODEL_PATH): with open(MODEL_PATH, 'rb') as f: model = pickle.load(f) if os.path.exists(SCALER_PATH): with open(SCALER_PATH, 'rb') as f: scaler = pickle.load(f) except Exception as e: print(f"Error loading data or model: {e}") class LocationInput(BaseModel): rainfall: float soil_suitability: float wildlife_potential: float population: float area: float lack_of_tree_cover: float class LocationResponse(BaseModel): suitability: str probability: float @app.get("/") def root(): return { "message": "TerraForm AI API is running. Use /docs to explore the API." } @app.post("/predict", response_model=LocationResponse) async def predict_suitability(location: LocationInput): """ Predict the suitability of a location for afforestation based on provided parameters. - **rainfall**: Average annual rainfall in inches - **soil_suitability**: Soil suitability rating (0 to 1) - **wildlife_potential**: Wildlife benefit potential (0 to 1) - **population**: Population count - **area**: Area available for afforestation in acres - **lack_of_tree_cover**: Measure of lack of tree cover (0 to 1) """ try: # Create feature array for the new location as a DataFrame with proper column names new_location = pd.DataFrame([[ location.rainfall, location.soil_suitability, location.wildlife_potential, location.population, location.area, location.lack_of_tree_cover ]], columns=features) # Apply scaling if scaler is available if scaler is not None: new_location_scaled = scaler.transform(new_location) else: new_location_scaled = new_location.values # Make prediction prediction = model.predict(new_location_scaled)[0] probability = model.predict_proba(new_location_scaled)[0][1] suitability = "Good" if prediction == 1 else "Not Good" return LocationResponse( suitability=suitability, probability=float(probability) ) except Exception as e: import traceback print(f"Error in prediction: {str(e)}") print(traceback.format_exc()) raise HTTPException(status_code=500, detail=f"Prediction error: {str(e)}") @app.get("/locations/{state}", response_model=List[dict]) async def get_state_locations(state: str): """ Get suitable locations for afforestation in the specified state. """ if data is None or model is None: raise HTTPException(status_code=503, detail="Data or model not loaded") state_data = data[data["State"] == state].copy() if state_data.empty: return [] # Prepare features for prediction X_state = state_data[features] # Apply scaling if needed if scaler is not None: X_state_scaled = scaler.transform(X_state) else: X_state_scaled = X_state.values # Predict suitability predictions = model.predict(X_state_scaled) probabilities = model.predict_proba(X_state_scaled)[:, 1] # Add predictions to the state data state_data["Prediction"] = predictions state_data["Probability"] = probabilities # Filter for good locations (Prediction == 1) good_locations = state_data[state_data["Prediction"] == 1] if good_locations.empty: return [] # Return the relevant data return good_locations[["City", "Probability"]].sort_values( by="Probability", ascending=False ).to_dict(orient="records") @app.get("/states", response_model=List[str]) async def get_states(): """ Get a list of all available states in the dataset. """ if data is None: raise HTTPException(status_code=503, detail="Data not loaded") return sorted(data["State"].unique().tolist()) @app.on_event("startup") async def startup_event(): """Train and save the model if it doesn't exist""" global data, model, scaler, features print("Starting up TerraForm AI application...") # Check if data file exists if not os.path.exists('data.csv'): print("Error: data.csv file not found!") return # Load data if not loaded yet if data is None: try: data = pd.read_csv('data.csv') print("Successfully loaded data.csv") except Exception as e: print(f"Failed to load data.csv: {e}") return # Check if model and scaler need to be created if not os.path.exists(MODEL_PATH) or not os.path.exists(SCALER_PATH): print("Training model and creating necessary files...") try: # Importing necessary packages from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler # Data preparation print(f"Data shape: {data.shape}") print("Preparing data for model training...") data['Normalized Rainfall'] = (data['Average Annual Rainfall (inches)'] - data['Average Annual Rainfall (inches)'].min()) / (data['Average Annual Rainfall (inches)'].max() - data['Average Annual Rainfall (inches)'].min()) data['Normalized Population'] = (data['Population'] - data['Population'].min()) / (data['Population'].max() - data['Population'].min()) data['Normalized Area'] = (data['Area available for afforestation (acres)'] - data['Area available for afforestation (acres)'].min()) / (data['Area available for afforestation (acres)'].max() - data['Area available for afforestation (acres)'].min()) data['afforestation_score'] = ( 0.3 * data['Normalized Rainfall'] + 0.35 * data['Soil Suitability (0 to 1)'] + 0.1 * data['Wildlife Benefit Potential (0 to 1)'] - 0.08 * np.sqrt(data['Normalized Population']) + 0.07 * data['Normalized Area'] + 0.1 * data['Lack of tree cover'] ) # Define threshold raw_threshold = 0.5 data["good_for_afforestation"] = (data["afforestation_score"] > raw_threshold).astype(int) print("Splitting data for training...") # Select features for modeling X = data[features] y = data['good_for_afforestation'] # Split data X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42 ) print("Scaling data...") # Create and fit the scaler - keeping the DataFrame structure scaler = MinMaxScaler() X_train_scaled = scaler.fit_transform(X_train) print("Training model...") # Create and train the model model = xgb.XGBClassifier( max_depth=4, learning_rate=0.1, n_estimators=100, subsample=0.9, objective="binary:logistic", random_state=42 ) model.fit(X_train_scaled, y_train) print(f"Saving model to {MODEL_PATH} and scaler to {SCALER_PATH}...") # Save the model and scaler to the temporary directory with open(MODEL_PATH, 'wb') as f: pickle.dump(model, f) with open(SCALER_PATH, 'wb') as f: pickle.dump(scaler, f) print("Model and scaler saved successfully!") except Exception as e: print(f"Error during model training: {e}") import traceback traceback.print_exc() return else: # Load model and scaler if they exist but weren't loaded if model is None: try: with open(MODEL_PATH, 'rb') as f: model = pickle.load(f) print(f"Successfully loaded existing model from {MODEL_PATH}") except Exception as e: print(f"Failed to load model: {e}") if scaler is None: try: with open(SCALER_PATH, 'rb') as f: scaler = pickle.load(f) print(f"Successfully loaded existing scaler from {SCALER_PATH}") except Exception as e: print(f"Failed to load scaler: {e}") print("Application startup completed successfully!")