arnab12345678's picture
Update app.py
1e2aaf2 verified
from fastapi import FastAPI, HTTPException
import joblib
import numpy as np
import pandas as pd
from pydantic import BaseModel
from xgboost import XGBClassifier
import xgboost as xgb
# Load XGBoost model with error handling
try:
model = XGBClassifier()
model.load_model("xgboost_model.json")
except Exception as e:
raise RuntimeError(f"Error loading model: {str(e)}")
# Load TF-IDF vectorizer with error handling
try:
vectorizer = joblib.load("vectorizer.joblib")
except Exception as e:
raise RuntimeError(f"Error loading vectorizer: {str(e)}")
# Initialize FastAPI
app = FastAPI()
# Define request model
class TextInput(BaseModel):
text: str
# Text cleaning function
def _text_cleaning(text):
return text.lower().strip().replace(r"[^a-z0-9\s]", "", regex=True)
@app.post("/predict/")
def predict(data: TextInput):
test_text = data.text.strip()
if not test_text:
raise HTTPException(status_code=400, detail="Input text cannot be empty.")
# Preprocess text
cleaned_text = _text_cleaning(test_text)
# TF-IDF transformation
try:
test_tfidf = vectorizer.transform([cleaned_text])
except Exception as e:
raise HTTPException(status_code=500, detail=f"TF-IDF transformation failed: {str(e)}")
# Compute text length feature
test_text_length = np.array([[len(test_text)]], dtype=np.float32)
# Combine features
test_features = np.hstack([test_tfidf.toarray(), test_text_length])
# Make prediction
try:
prediction = model.predict(test_features)[0]
prediction_proba = model.predict_proba(test_features)[:, 1][0]
except Exception as e:
raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
return {
"prediction": int(prediction),
"prediction_probability": round(float(prediction_proba), 4)
}
@app.get("/")
def home():
return {"message": "XGBoost Text Classification API is live!"}