Spaces:
Running
Running
| from fastapi import FastAPI, Query, HTTPException | |
| from pydantic import BaseModel | |
| import numpy as np | |
| import pandas as pd | |
| import xgboost as xgb | |
| import joblib | |
| import pickle | |
| import os | |
| from typing import List, Optional | |
| app = FastAPI( | |
| title="TerraForm AI", | |
| description="AI model for predicting locations suitable for afforestation", | |
| version="1.0.0" | |
| ) | |
| # Create a models directory if it doesn't exist | |
| os.makedirs('/tmp/models', exist_ok=True) | |
| # Define model paths using the temporary directory | |
| MODEL_PATH = '/tmp/models/model.pkl' | |
| SCALER_PATH = '/tmp/models/scaler.pkl' | |
| # Global variables | |
| data = None | |
| model = None | |
| scaler = None | |
| features = ['Average Annual Rainfall (inches)', 'Soil Suitability (0 to 1)', | |
| 'Wildlife Benefit Potential (0 to 1)', 'Population', | |
| 'Area available for afforestation (acres)', 'Lack of tree cover'] | |
| # Try to load the data but don't fail if it's not available | |
| # We'll handle this in the startup event | |
| try: | |
| if os.path.exists('data.csv'): | |
| data = pd.read_csv('data.csv') | |
| if os.path.exists(MODEL_PATH): | |
| with open(MODEL_PATH, 'rb') as f: | |
| model = pickle.load(f) | |
| if os.path.exists(SCALER_PATH): | |
| with open(SCALER_PATH, 'rb') as f: | |
| scaler = pickle.load(f) | |
| except Exception as e: | |
| print(f"Error loading data or model: {e}") | |
| class LocationInput(BaseModel): | |
| rainfall: float | |
| soil_suitability: float | |
| wildlife_potential: float | |
| population: float | |
| area: float | |
| lack_of_tree_cover: float | |
| class LocationResponse(BaseModel): | |
| suitability: str | |
| probability: float | |
| def root(): | |
| return { | |
| "message": "TerraForm AI API is running. Use /docs to explore the API." | |
| } | |
| async def predict_suitability(location: LocationInput): | |
| """ | |
| Predict the suitability of a location for afforestation based on provided parameters. | |
| - **rainfall**: Average annual rainfall in inches | |
| - **soil_suitability**: Soil suitability rating (0 to 1) | |
| - **wildlife_potential**: Wildlife benefit potential (0 to 1) | |
| - **population**: Population count | |
| - **area**: Area available for afforestation in acres | |
| - **lack_of_tree_cover**: Measure of lack of tree cover (0 to 1) | |
| """ | |
| try: | |
| # Create feature array for the new location as a DataFrame with proper column names | |
| new_location = pd.DataFrame([[ | |
| location.rainfall, | |
| location.soil_suitability, | |
| location.wildlife_potential, | |
| location.population, | |
| location.area, | |
| location.lack_of_tree_cover | |
| ]], columns=features) | |
| # Apply scaling if scaler is available | |
| if scaler is not None: | |
| new_location_scaled = scaler.transform(new_location) | |
| else: | |
| new_location_scaled = new_location.values | |
| # Make prediction | |
| prediction = model.predict(new_location_scaled)[0] | |
| probability = model.predict_proba(new_location_scaled)[0][1] | |
| suitability = "Good" if prediction == 1 else "Not Good" | |
| return LocationResponse( | |
| suitability=suitability, | |
| probability=float(probability) | |
| ) | |
| except Exception as e: | |
| import traceback | |
| print(f"Error in prediction: {str(e)}") | |
| print(traceback.format_exc()) | |
| raise HTTPException(status_code=500, detail=f"Prediction error: {str(e)}") | |
| async def get_state_locations(state: str): | |
| """ | |
| Get suitable locations for afforestation in the specified state. | |
| """ | |
| if data is None or model is None: | |
| raise HTTPException(status_code=503, detail="Data or model not loaded") | |
| state_data = data[data["State"] == state].copy() | |
| if state_data.empty: | |
| return [] | |
| # Prepare features for prediction | |
| X_state = state_data[features] | |
| # Apply scaling if needed | |
| if scaler is not None: | |
| X_state_scaled = scaler.transform(X_state) | |
| else: | |
| X_state_scaled = X_state.values | |
| # Predict suitability | |
| predictions = model.predict(X_state_scaled) | |
| probabilities = model.predict_proba(X_state_scaled)[:, 1] | |
| # Add predictions to the state data | |
| state_data["Prediction"] = predictions | |
| state_data["Probability"] = probabilities | |
| # Filter for good locations (Prediction == 1) | |
| good_locations = state_data[state_data["Prediction"] == 1] | |
| if good_locations.empty: | |
| return [] | |
| # Return the relevant data | |
| return good_locations[["City", "Probability"]].sort_values( | |
| by="Probability", ascending=False | |
| ).to_dict(orient="records") | |
| async def get_states(): | |
| """ | |
| Get a list of all available states in the dataset. | |
| """ | |
| if data is None: | |
| raise HTTPException(status_code=503, detail="Data not loaded") | |
| return sorted(data["State"].unique().tolist()) | |
| async def startup_event(): | |
| """Train and save the model if it doesn't exist""" | |
| global data, model, scaler, features | |
| print("Starting up TerraForm AI application...") | |
| # Check if data file exists | |
| if not os.path.exists('data.csv'): | |
| print("Error: data.csv file not found!") | |
| return | |
| # Load data if not loaded yet | |
| if data is None: | |
| try: | |
| data = pd.read_csv('data.csv') | |
| print("Successfully loaded data.csv") | |
| except Exception as e: | |
| print(f"Failed to load data.csv: {e}") | |
| return | |
| # Check if model and scaler need to be created | |
| if not os.path.exists(MODEL_PATH) or not os.path.exists(SCALER_PATH): | |
| print("Training model and creating necessary files...") | |
| try: | |
| # Importing necessary packages | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import MinMaxScaler | |
| # Data preparation | |
| print(f"Data shape: {data.shape}") | |
| print("Preparing data for model training...") | |
| data['Normalized Rainfall'] = (data['Average Annual Rainfall (inches)'] - data['Average Annual Rainfall (inches)'].min()) / (data['Average Annual Rainfall (inches)'].max() - data['Average Annual Rainfall (inches)'].min()) | |
| data['Normalized Population'] = (data['Population'] - data['Population'].min()) / (data['Population'].max() - data['Population'].min()) | |
| data['Normalized Area'] = (data['Area available for afforestation (acres)'] - data['Area available for afforestation (acres)'].min()) / (data['Area available for afforestation (acres)'].max() - data['Area available for afforestation (acres)'].min()) | |
| data['afforestation_score'] = ( | |
| 0.3 * data['Normalized Rainfall'] + | |
| 0.35 * data['Soil Suitability (0 to 1)'] + | |
| 0.1 * data['Wildlife Benefit Potential (0 to 1)'] - | |
| 0.08 * np.sqrt(data['Normalized Population']) + | |
| 0.07 * data['Normalized Area'] + | |
| 0.1 * data['Lack of tree cover'] | |
| ) | |
| # Define threshold | |
| raw_threshold = 0.5 | |
| data["good_for_afforestation"] = (data["afforestation_score"] > raw_threshold).astype(int) | |
| print("Splitting data for training...") | |
| # Select features for modeling | |
| X = data[features] | |
| y = data['good_for_afforestation'] | |
| # Split data | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| X, y, test_size=0.2, random_state=42 | |
| ) | |
| print("Scaling data...") | |
| # Create and fit the scaler - keeping the DataFrame structure | |
| scaler = MinMaxScaler() | |
| X_train_scaled = scaler.fit_transform(X_train) | |
| print("Training model...") | |
| # Create and train the model | |
| model = xgb.XGBClassifier( | |
| max_depth=4, | |
| learning_rate=0.1, | |
| n_estimators=100, | |
| subsample=0.9, | |
| objective="binary:logistic", | |
| random_state=42 | |
| ) | |
| model.fit(X_train_scaled, y_train) | |
| print(f"Saving model to {MODEL_PATH} and scaler to {SCALER_PATH}...") | |
| # Save the model and scaler to the temporary directory | |
| with open(MODEL_PATH, 'wb') as f: | |
| pickle.dump(model, f) | |
| with open(SCALER_PATH, 'wb') as f: | |
| pickle.dump(scaler, f) | |
| print("Model and scaler saved successfully!") | |
| except Exception as e: | |
| print(f"Error during model training: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return | |
| else: | |
| # Load model and scaler if they exist but weren't loaded | |
| if model is None: | |
| try: | |
| with open(MODEL_PATH, 'rb') as f: | |
| model = pickle.load(f) | |
| print(f"Successfully loaded existing model from {MODEL_PATH}") | |
| except Exception as e: | |
| print(f"Failed to load model: {e}") | |
| if scaler is None: | |
| try: | |
| with open(SCALER_PATH, 'rb') as f: | |
| scaler = pickle.load(f) | |
| print(f"Successfully loaded existing scaler from {SCALER_PATH}") | |
| except Exception as e: | |
| print(f"Failed to load scaler: {e}") | |
| print("Application startup completed successfully!") | |