shreyankisiri's picture
Update app.py
f6af31f verified
from fastapi import FastAPI
from pydantic import BaseModel
import joblib
import nltk
import os
import numpy as np
# Set NLTK data directory
nltk_data_path = "/app/nltk_data"
os.environ["NLTK_DATA"] = nltk_data_path
nltk.data.path.append(nltk_data_path)
# Load ML model and transformers
model = joblib.load("model.joblib")
tfidf_vectorizer = joblib.load("tfidf_vectorizer.joblib")
le = joblib.load("labelencoder.joblib")
class TextInput(BaseModel):
text: str
app = FastAPI()
@app.post("/predict")
def predict(input: TextInput):
processed_text = preprocess_text(input.text)
text_tfidf = tfidf_vectorizer.transform([processed_text]).toarray()
y_pred = model.predict(text_tfidf)[0]
category_name = le.inverse_transform([y_pred])[0]
probabilities = model.predict_proba(text_tfidf)[0]
if np.max(probabilities>0.5) :
return category_name
else :
return "Quite ambigous but maybe "+category_name
def preprocess_text(text):
import re
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))
text = re.sub('[^a-zA-Z]', ' ', text).lower()
words = [lemmatizer.lemmatize(word) for word in text.split() if word not in stop_words]
return ' '.join(words)