from fastapi import FastAPI, HTTPException import joblib import numpy as np import pandas as pd from pydantic import BaseModel from xgboost import XGBClassifier import xgboost as xgb # Load XGBoost model with error handling try: model = XGBClassifier() model.load_model("xgboost_model.json") except Exception as e: raise RuntimeError(f"Error loading model: {str(e)}") # Load TF-IDF vectorizer with error handling try: vectorizer = joblib.load("vectorizer.joblib") except Exception as e: raise RuntimeError(f"Error loading vectorizer: {str(e)}") # Initialize FastAPI app = FastAPI() # Define request model class TextInput(BaseModel): text: str # Text cleaning function def _text_cleaning(text): return text.lower().strip().replace(r"[^a-z0-9\s]", "", regex=True) @app.post("/predict/") def predict(data: TextInput): test_text = data.text.strip() if not test_text: raise HTTPException(status_code=400, detail="Input text cannot be empty.") # Preprocess text cleaned_text = _text_cleaning(test_text) # TF-IDF transformation try: test_tfidf = vectorizer.transform([cleaned_text]) except Exception as e: raise HTTPException(status_code=500, detail=f"TF-IDF transformation failed: {str(e)}") # Compute text length feature test_text_length = np.array([[len(test_text)]], dtype=np.float32) # Combine features test_features = np.hstack([test_tfidf.toarray(), test_text_length]) # Make prediction try: prediction = model.predict(test_features)[0] prediction_proba = model.predict_proba(test_features)[:, 1][0] except Exception as e: raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}") return { "prediction": int(prediction), "prediction_probability": round(float(prediction_proba), 4) } @app.get("/") def home(): return {"message": "XGBoost Text Classification API is live!"}