Spaces:

chenhaoq87
/

PreharvestRiskModel

Paused

App Files Files Community

chenhaoq87 commited on Jan 28

Commit

d5c82d8

verified ·

1 Parent(s): 096b4be

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +487 -0

app.py ADDED Viewed

	@@ -0,0 +1,487 @@

+"""
+E.coli Preharvest Risk Model - FastAPI Inference Application
+This API provides endpoints for making predictions on E.coli contamination risk
+using the trained machine learning model.
+"""
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+from typing import List, Dict, Optional
+import joblib
+import pandas as pd
+import numpy as np
+import json
+import os
+from pathlib import Path
+import logging
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Initialize FastAPI app
+app = FastAPI(
+    title="E.coli Preharvest Risk Prediction API",
+    description="API for predicting E.coli contamination risk in preharvest produce",
+    version="1.0.0"
+)
+# Global variables for model artifacts
+MODEL = None
+PREPROCESSOR = None
+FEATURE_NAMES = None
+MODEL_METRICS = None
+MODEL_COMPARISON = None
+class PredictionInput(BaseModel):
+    """Input schema for prediction requests."""
+    org_conv_kiptraq: str = Field(..., description="Organic or Conventional")
+    acres_kiptraq: float = Field(..., description="Farm size in acres")
+    lat: float = Field(..., description="Latitude")
+    lon: float = Field(..., description="Longitude")
+    season: str = Field(..., description="Season (Spring, Summer, Fall, Winter)")
+    # Day 0 weather features
+    temperature_avg_d0: float
+    temperature_max_d0: float
+    temperature_min_d0: float
+    humidity_avg_d0: float
+    humidity_max_d0: float
+    humidity_min_d0: float
+    wind_speed_avg_d0: float
+    wind_speed_max_d0: float
+    wind_speed_min_d0: float
+    wind_run_avg_d0: float
+    wind_run_max_d0: float
+    wind_run_min_d0: float
+    wind_chill_avg_d0: float
+    wind_chill_max_d0: float
+    wind_chill_min_d0: float
+    heat_index_avg_d0: float
+    heat_index_max_d0: float
+    heat_index_min_d0: float
+    thw_index_avg_d0: float
+    thw_index_max_d0: float
+    thw_index_min_d0: float
+    rain_avg_d0: float
+    rain_max_d0: float
+    rain_min_d0: float
+    rain_rate_avg_d0: float
+    rain_rate_max_d0: float
+    rain_rate_min_d0: float
+    solar_radiation_max_d0: float
+    solar_radiation_min_d0: float
+    ET_avg_d0: float
+    ET_max_d0: float
+    ET_min_d0: float
+    heating_degree_days_avg_d0: float
+    heating_degree_days_max_d0: float
+    heating_degree_days_min_d0: float
+    cooling_degree_days_avg_d0: float
+    cooling_degree_days_max_d0: float
+    cooling_degree_days_min_d0: float
+    wind_direction_mode_d0: str
+    # Day 1 before weather features
+    temperature_avg_d1_before: float
+    temperature_max_d1_before: float
+    temperature_min_d1_before: float
+    humidity_avg_d1_before: float
+    humidity_max_d1_before: float
+    humidity_min_d1_before: float
+    wind_speed_avg_d1_before: float
+    wind_speed_max_d1_before: float
+    wind_speed_min_d1_before: float
+    wind_run_avg_d1_before: float
+    wind_run_max_d1_before: float
+    wind_run_min_d1_before: float
+    wind_chill_avg_d1_before: float
+    wind_chill_max_d1_before: float
+    wind_chill_min_d1_before: float
+    heat_index_avg_d1_before: float
+    heat_index_max_d1_before: float
+    heat_index_min_d1_before: float
+    thw_index_avg_d1_before: float
+    thw_index_max_d1_before: float
+    thw_index_min_d1_before: float
+    rain_avg_d1_before: float
+    rain_max_d1_before: float
+    rain_min_d1_before: float
+    rain_rate_avg_d1_before: float
+    rain_rate_max_d1_before: float
+    rain_rate_min_d1_before: float
+    solar_radiation_max_d1_before: float
+    solar_radiation_min_d1_before: float
+    ET_avg_d1_before: float
+    ET_max_d1_before: float
+    ET_min_d1_before: float
+    heating_degree_days_avg_d1_before: float
+    heating_degree_days_max_d1_before: float
+    heating_degree_days_min_d1_before: float
+    cooling_degree_days_avg_d1_before: float
+    cooling_degree_days_max_d1_before: float
+    cooling_degree_days_min_d1_before: float
+    wind_direction_mode_d1_before: str
+    # Day 3 before weather features
+    temperature_avg_d3_before: float
+    temperature_max_d3_before: float
+    temperature_min_d3_before: float
+    humidity_avg_d3_before: float
+    humidity_max_d3_before: float
+    humidity_min_d3_before: float
+    wind_speed_avg_d3_before: float
+    wind_speed_max_d3_before: float
+    wind_speed_min_d3_before: float
+    wind_run_avg_d3_before: float
+    wind_run_max_d3_before: float
+    wind_run_min_d3_before: float
+    wind_chill_avg_d3_before: float
+    wind_chill_max_d3_before: float
+    wind_chill_min_d3_before: float
+    heat_index_avg_d3_before: float
+    heat_index_max_d3_before: float
+    heat_index_min_d3_before: float
+    thw_index_avg_d3_before: float
+    thw_index_max_d3_before: float
+    thw_index_min_d3_before: float
+    rain_avg_d3_before: float
+    rain_max_d3_before: float
+    rain_min_d3_before: float
+    rain_rate_avg_d3_before: float
+    rain_rate_max_d3_before: float
+    rain_rate_min_d3_before: float
+    solar_radiation_max_d3_before: float
+    solar_radiation_min_d3_before: float
+    ET_avg_d3_before: float
+    ET_max_d3_before: float
+    ET_min_d3_before: float
+    heating_degree_days_avg_d3_before: float
+    heating_degree_days_max_d3_before: float
+    heating_degree_days_min_d3_before: float
+    cooling_degree_days_avg_d3_before: float
+    cooling_degree_days_max_d3_before: float
+    cooling_degree_days_min_d3_before: float
+    wind_direction_mode_d3_before: str
+    # Day 7 before weather features
+    temperature_avg_d7_before: float
+    temperature_max_d7_before: float
+    temperature_min_d7_before: float
+    humidity_avg_d7_before: float
+    humidity_max_d7_before: float
+    humidity_min_d7_before: float
+    wind_speed_avg_d7_before: float
+    wind_speed_max_d7_before: float
+    wind_speed_min_d7_before: float
+    wind_run_avg_d7_before: float
+    wind_run_max_d7_before: float
+    wind_run_min_d7_before: float
+    wind_chill_avg_d7_before: float
+    wind_chill_max_d7_before: float
+    wind_chill_min_d7_before: float
+    heat_index_avg_d7_before: float
+    heat_index_max_d7_before: float
+    heat_index_min_d7_before: float
+    thw_index_avg_d7_before: float
+    thw_index_max_d7_before: float
+    thw_index_min_d7_before: float
+    rain_avg_d7_before: float
+    rain_max_d7_before: float
+    rain_min_d7_before: float
+    rain_rate_avg_d7_before: float
+    rain_rate_max_d7_before: float
+    rain_rate_min_d7_before: float
+    solar_radiation_max_d7_before: float
+    solar_radiation_min_d7_before: float
+    ET_avg_d7_before: float
+    ET_max_d7_before: float
+    ET_min_d7_before: float
+    heating_degree_days_avg_d7_before: float
+    heating_degree_days_max_d7_before: float
+    heating_degree_days_min_d7_before: float
+    cooling_degree_days_avg_d7_before: float
+    cooling_degree_days_max_d7_before: float
+    cooling_degree_days_min_d7_before: float
+    wind_direction_mode_d7_before: str
+    class Config:
+        schema_extra = {
+            "example": {
+                "org_conv_kiptraq": "Conventional",
+                "acres_kiptraq": 10.0,
+                "lat": 36.5,
+                "lon": -121.5,
+                "season": "Fall",
+                "temperature_avg_d0": 70.0,
+                "temperature_max_d0": 85.0,
+                "temperature_min_d0": 55.0,
+                "humidity_avg_d0": 65.0,
+                "humidity_max_d0": 85.0,
+                "humidity_min_d0": 45.0,
+                "wind_speed_avg_d0": 5.0,
+                "wind_speed_max_d0": 12.0,
+                "wind_speed_min_d0": 0.0,
+                "wind_run_avg_d0": 1.2,
+                "wind_run_max_d0": 3.0,
+                "wind_run_min_d0": 0.0,
+                "wind_chill_avg_d0": 68.0,
+                "wind_chill_max_d0": 85.0,
+                "wind_chill_min_d0": 55.0,
+                "heat_index_avg_d0": 70.0,
+                "heat_index_max_d0": 85.0,
+                "heat_index_min_d0": 55.0,
+                "thw_index_avg_d0": 68.0,
+                "thw_index_max_d0": 85.0,
+                "thw_index_min_d0": 55.0,
+                "rain_avg_d0": 0.0,
+                "rain_max_d0": 0.0,
+                "rain_min_d0": 0.0,
+                "rain_rate_avg_d0": 0.0,
+                "rain_rate_max_d0": 0.0,
+                "rain_rate_min_d0": 0.0,
+                "solar_radiation_max_d0": 850.0,
+                "solar_radiation_min_d0": 0.0,
+                "ET_avg_d0": 0.15,
+                "ET_max_d0": 0.25,
+                "ET_min_d0": 0.0,
+                "heating_degree_days_avg_d0": 0.0,
+                "heating_degree_days_max_d0": 0.0,
+                "heating_degree_days_min_d0": 0.0,
+                "cooling_degree_days_avg_d0": 5.0,
+                "cooling_degree_days_max_d0": 20.0,
+                "cooling_degree_days_min_d0": 0.0,
+                "wind_direction_mode_d0": "W",
+                # Similar pattern for d1, d3, d7
+                # (abbreviated for brevity)
+            }
+        }
+class PredictionOutput(BaseModel):
+    """Output schema for prediction responses."""
+    prediction: str = Field(..., description="Predicted class: 'Positive' or 'Negative'")
+    probability_positive: float = Field(..., description="Probability of E.coli positive")
+    probability_negative: float = Field(..., description="Probability of E.coli negative")
+    risk_level: str = Field(..., description="Risk level: 'Low', 'Medium', or 'High'")
+class ModelInfo(BaseModel):
+    """Model information schema."""
+    algorithm: str
+    training_date: str
+    metrics: Dict
+    top_features: Dict
+@app.on_event("startup")
+async def load_model_artifacts():
+    """Load model artifacts on application startup."""
+    global MODEL, PREPROCESSOR, FEATURE_NAMES, MODEL_METRICS, MODEL_COMPARISON
+    model_dir = Path("model")
+    try:
+        # Load model
+        model_path = model_dir / "best_model.joblib"
+        MODEL = joblib.load(model_path)
+        logger.info(f"Loaded model from {model_path}")
+        # Load preprocessor
+        preprocessor_path = model_dir / "preprocessor.joblib"
+        PREPROCESSOR = joblib.load(preprocessor_path)
+        logger.info(f"Loaded preprocessor from {preprocessor_path}")
+        # Load feature names
+        feature_names_path = model_dir / "feature_names.json"
+        with open(feature_names_path, 'r') as f:
+            FEATURE_NAMES = json.load(f)
+        logger.info(f"Loaded {len(FEATURE_NAMES)} feature names")
+        # Load model metrics
+        metrics_path = model_dir / "model_metrics.json"
+        with open(metrics_path, 'r') as f:
+            MODEL_METRICS = json.load(f)
+        logger.info(f"Loaded model metrics for {MODEL_METRICS.get('winning_algorithm', 'unknown')}")
+        # Load model comparison
+        comparison_path = model_dir / "model_comparison.json"
+        with open(comparison_path, 'r') as f:
+            MODEL_COMPARISON = json.load(f)
+        logger.info(f"Loaded comparison for {len(MODEL_COMPARISON)} models")
+        logger.info("All model artifacts loaded successfully!")
+    except Exception as e:
+        logger.error(f"Error loading model artifacts: {e}")
+        raise
+def preprocess_input(input_data: PredictionInput) -> pd.DataFrame:
+    """
+    Convert input data to DataFrame with proper format for prediction.
+    Args:
+        input_data: Pydantic model with input features
+    Returns:
+        pd.DataFrame: Preprocessed features
+    """
+    # Convert to dictionary
+    data_dict = input_data.dict()
+    # Create DataFrame
+    df = pd.DataFrame([data_dict])
+    # Apply one-hot encoding for categorical variables (same as training)
+    categorical_cols = ['org_conv_kiptraq', 'season',
+                       'wind_direction_mode_d0', 'wind_direction_mode_d1_before',
+                       'wind_direction_mode_d3_before', 'wind_direction_mode_d7_before']
+    df = pd.get_dummies(df, columns=categorical_cols, drop_first=False)
+    # Align columns with training data
+    # Add missing columns with 0 values
+    for col in FEATURE_NAMES:
+        if col not in df.columns:
+            df[col] = 0
+    # Keep only columns that were in training
+    df = df[FEATURE_NAMES]
+    return df
+@app.get("/")
+async def root():
+    """Root endpoint with API information."""
+    return {
+        "message": "E.coli Preharvest Risk Prediction API",
+        "version": "1.0.0",
+        "endpoints": {
+            "/predict": "POST - Single prediction",
+            "/predict_batch": "POST - Batch predictions",
+            "/model_info": "GET - Model information and metrics",
+            "/health": "GET - Health check"
+        }
+    }
+@app.get("/health")
+async def health_check():
+    """Health check endpoint."""
+    if MODEL is None:
+        raise HTTPException(status_code=503, detail="Model not loaded")
+    return {
+        "status": "healthy",
+        "model_loaded": MODEL is not None,
+        "algorithm": MODEL_METRICS.get('winning_algorithm', 'unknown') if MODEL_METRICS else 'unknown'
+    }
+@app.get("/model_info", response_model=ModelInfo)
+async def get_model_info():
+    """Get model information and performance metrics."""
+    if MODEL_METRICS is None:
+        raise HTTPException(status_code=503, detail="Model metrics not loaded")
+    return ModelInfo(
+        algorithm=MODEL_METRICS.get('winning_algorithm', 'unknown'),
+        training_date=MODEL_METRICS.get('training_date', 'unknown'),
+        metrics=MODEL_METRICS.get('metrics', {}),
+        top_features=MODEL_METRICS.get('top_features', {})
+    )
+@app.get("/model_comparison")
+async def get_model_comparison():
+    """Get comparison results for all trained models."""
+    if MODEL_COMPARISON is None:
+        raise HTTPException(status_code=503, detail="Model comparison not loaded")
+    return {
+        "comparison": MODEL_COMPARISON,
+        "winner": MODEL_METRICS.get('winning_algorithm', 'unknown') if MODEL_METRICS else 'unknown'
+    }
+@app.post("/predict", response_model=PredictionOutput)
+async def predict(input_data: PredictionInput):
+    """
+    Make a single prediction.
+    Args:
+        input_data: Input features for prediction
+    Returns:
+        PredictionOutput: Prediction result with probabilities
+    """
+    if MODEL is None:
+        raise HTTPException(status_code=503, detail="Model not loaded")
+    try:
+        # Preprocess input
+        df = preprocess_input(input_data)
+        # Make prediction
+        prediction = MODEL.predict(df)[0]
+        probabilities = MODEL.predict_proba(df)[0]
+        # Get probability for each class
+        # Classes are in order: ['Negative', 'Positive']
+        prob_negative = float(probabilities[0])
+        prob_positive = float(probabilities[1])
+        # Determine risk level
+        if prob_positive < 0.3:
+            risk_level = "Low"
+        elif prob_positive < 0.7:
+            risk_level = "Medium"
+        else:
+            risk_level = "High"
+        return PredictionOutput(
+            prediction=prediction,
+            probability_positive=prob_positive,
+            probability_negative=prob_negative,
+            risk_level=risk_level
+        )
+    except Exception as e:
+        logger.error(f"Prediction error: {e}")
+        raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
+@app.post("/predict_batch")
+async def predict_batch(input_data: List[PredictionInput]):
+    """
+    Make batch predictions.
+    Args:
+        input_data: List of input features for prediction
+    Returns:
+        List of prediction results
+    """
+    if MODEL is None:
+        raise HTTPException(status_code=503, detail="Model not loaded")
+    try:
+        results = []
+        for data in input_data:
+            result = await predict(data)
+            results.append(result.dict())
+        return {"predictions": results, "count": len(results)}
+    except Exception as e:
+        logger.error(f"Batch prediction error: {e}")
+        raise HTTPException(status_code=500, detail=f"Batch prediction failed: {str(e)}")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)