Spaces:

Mojo-Maniac
/

TerraForm_AI

Running

UnknownKeek@2005

Commit 5

e00bfb4 about 1 year ago

9.76 kB

	from fastapi import FastAPI, Query, HTTPException
	from pydantic import BaseModel
	import numpy as np
	import pandas as pd
	import xgboost as xgb
	import joblib
	import pickle
	import os
	from typing import List, Optional

	app = FastAPI(
	title="TerraForm AI",
	description="AI model for predicting locations suitable for afforestation",
	version="1.0.0"
	)

	# Create a models directory if it doesn't exist
	os.makedirs('/tmp/models', exist_ok=True)

	# Define model paths using the temporary directory
	MODEL_PATH = '/tmp/models/model.pkl'
	SCALER_PATH = '/tmp/models/scaler.pkl'

	# Global variables
	data = None
	model = None
	scaler = None
	features = ['Average Annual Rainfall (inches)', 'Soil Suitability (0 to 1)',
	'Wildlife Benefit Potential (0 to 1)', 'Population',
	'Area available for afforestation (acres)', 'Lack of tree cover']

	# Try to load the data but don't fail if it's not available
	# We'll handle this in the startup event
	try:
	if os.path.exists('data.csv'):
	data = pd.read_csv('data.csv')
	if os.path.exists(MODEL_PATH):
	with open(MODEL_PATH, 'rb') as f:
	model = pickle.load(f)
	if os.path.exists(SCALER_PATH):
	with open(SCALER_PATH, 'rb') as f:
	scaler = pickle.load(f)
	except Exception as e:
	print(f"Error loading data or model: {e}")

	class LocationInput(BaseModel):
	rainfall: float
	soil_suitability: float
	wildlife_potential: float
	population: float
	area: float
	lack_of_tree_cover: float

	class LocationResponse(BaseModel):
	suitability: str
	probability: float

	@app.get("/")
	def root():
	return {
	"message": "TerraForm AI API is running. Use /docs to explore the API."
	}

	@app.post("/predict", response_model=LocationResponse)
	async def predict_suitability(location: LocationInput):
	"""
	Predict the suitability of a location for afforestation based on provided parameters.

	- rainfall: Average annual rainfall in inches
	- soil_suitability: Soil suitability rating (0 to 1)
	- wildlife_potential: Wildlife benefit potential (0 to 1)
	- population: Population count
	- area: Area available for afforestation in acres
	- lack_of_tree_cover: Measure of lack of tree cover (0 to 1)
	"""
	try:
	# Create feature array for the new location as a DataFrame with proper column names
	new_location = pd.DataFrame([[
	location.rainfall,
	location.soil_suitability,
	location.wildlife_potential,
	location.population,
	location.area,
	location.lack_of_tree_cover
	]], columns=features)

	# Apply scaling if scaler is available
	if scaler is not None:
	new_location_scaled = scaler.transform(new_location)
	else:
	new_location_scaled = new_location.values

	# Make prediction
	prediction = model.predict(new_location_scaled)[0]
	probability = model.predict_proba(new_location_scaled)[0][1]

	suitability = "Good" if prediction == 1 else "Not Good"

	return LocationResponse(
	suitability=suitability,
	probability=float(probability)
	)
	except Exception as e:
	import traceback
	print(f"Error in prediction: {str(e)}")
	print(traceback.format_exc())
	raise HTTPException(status_code=500, detail=f"Prediction error: {str(e)}")

	@app.get("/locations/{state}", response_model=List[dict])
	async def get_state_locations(state: str):
	"""
	Get suitable locations for afforestation in the specified state.
	"""
	if data is None or model is None:
	raise HTTPException(status_code=503, detail="Data or model not loaded")

	state_data = data[data["State"] == state].copy()

	if state_data.empty:
	return []

	# Prepare features for prediction
	X_state = state_data[features]

	# Apply scaling if needed
	if scaler is not None:
	X_state_scaled = scaler.transform(X_state)
	else:
	X_state_scaled = X_state.values

	# Predict suitability
	predictions = model.predict(X_state_scaled)
	probabilities = model.predict_proba(X_state_scaled)[:, 1]

	# Add predictions to the state data
	state_data["Prediction"] = predictions
	state_data["Probability"] = probabilities

	# Filter for good locations (Prediction == 1)
	good_locations = state_data[state_data["Prediction"] == 1]

	if good_locations.empty:
	return []

	# Return the relevant data
	return good_locations[["City", "Probability"]].sort_values(
	by="Probability", ascending=False
	).to_dict(orient="records")

	@app.get("/states", response_model=List[str])
	async def get_states():
	"""
	Get a list of all available states in the dataset.
	"""
	if data is None:
	raise HTTPException(status_code=503, detail="Data not loaded")

	return sorted(data["State"].unique().tolist())

	@app.on_event("startup")
	async def startup_event():
	"""Train and save the model if it doesn't exist"""
	global data, model, scaler, features

	print("Starting up TerraForm AI application...")

	# Check if data file exists
	if not os.path.exists('data.csv'):
	print("Error: data.csv file not found!")
	return

	# Load data if not loaded yet
	if data is None:
	try:
	data = pd.read_csv('data.csv')
	print("Successfully loaded data.csv")
	except Exception as e:
	print(f"Failed to load data.csv: {e}")
	return

	# Check if model and scaler need to be created
	if not os.path.exists(MODEL_PATH) or not os.path.exists(SCALER_PATH):
	print("Training model and creating necessary files...")

	try:
	# Importing necessary packages
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import MinMaxScaler

	# Data preparation
	print(f"Data shape: {data.shape}")
	print("Preparing data for model training...")

	data['Normalized Rainfall'] = (data['Average Annual Rainfall (inches)'] - data['Average Annual Rainfall (inches)'].min()) / (data['Average Annual Rainfall (inches)'].max() - data['Average Annual Rainfall (inches)'].min())
	data['Normalized Population'] = (data['Population'] - data['Population'].min()) / (data['Population'].max() - data['Population'].min())
	data['Normalized Area'] = (data['Area available for afforestation (acres)'] - data['Area available for afforestation (acres)'].min()) / (data['Area available for afforestation (acres)'].max() - data['Area available for afforestation (acres)'].min())

	data['afforestation_score'] = (
	0.3 * data['Normalized Rainfall'] +
	0.35 * data['Soil Suitability (0 to 1)'] +
	0.1 * data['Wildlife Benefit Potential (0 to 1)'] -
	0.08 * np.sqrt(data['Normalized Population']) +
	0.07 * data['Normalized Area'] +
	0.1 * data['Lack of tree cover']
	)

	# Define threshold
	raw_threshold = 0.5
	data["good_for_afforestation"] = (data["afforestation_score"] > raw_threshold).astype(int)

	print("Splitting data for training...")
	# Select features for modeling
	X = data[features]
	y = data['good_for_afforestation']

	# Split data
	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=0.2, random_state=42
	)

	print("Scaling data...")
	# Create and fit the scaler - keeping the DataFrame structure
	scaler = MinMaxScaler()
	X_train_scaled = scaler.fit_transform(X_train)

	print("Training model...")
	# Create and train the model
	model = xgb.XGBClassifier(
	max_depth=4,
	learning_rate=0.1,
	n_estimators=100,
	subsample=0.9,
	objective="binary:logistic",
	random_state=42
	)

	model.fit(X_train_scaled, y_train)

	print(f"Saving model to {MODEL_PATH} and scaler to {SCALER_PATH}...")
	# Save the model and scaler to the temporary directory
	with open(MODEL_PATH, 'wb') as f:
	pickle.dump(model, f)
	with open(SCALER_PATH, 'wb') as f:
	pickle.dump(scaler, f)

	print("Model and scaler saved successfully!")
	except Exception as e:
	print(f"Error during model training: {e}")
	import traceback
	traceback.print_exc()
	return
	else:
	# Load model and scaler if they exist but weren't loaded
	if model is None:
	try:
	with open(MODEL_PATH, 'rb') as f:
	model = pickle.load(f)
	print(f"Successfully loaded existing model from {MODEL_PATH}")
	except Exception as e:
	print(f"Failed to load model: {e}")

	if scaler is None:
	try:
	with open(SCALER_PATH, 'rb') as f:
	scaler = pickle.load(f)
	print(f"Successfully loaded existing scaler from {SCALER_PATH}")
	except Exception as e:
	print(f"Failed to load scaler: {e}")

	print("Application startup completed successfully!")