from fastapi import FastAPI from pydantic import BaseModel import joblib import nltk import os import numpy as np # Set NLTK data directory nltk_data_path = "/app/nltk_data" os.environ["NLTK_DATA"] = nltk_data_path nltk.data.path.append(nltk_data_path) # Load ML model and transformers model = joblib.load("model.joblib") tfidf_vectorizer = joblib.load("tfidf_vectorizer.joblib") le = joblib.load("labelencoder.joblib") class TextInput(BaseModel): text: str app = FastAPI() @app.post("/predict") def predict(input: TextInput): processed_text = preprocess_text(input.text) text_tfidf = tfidf_vectorizer.transform([processed_text]).toarray() y_pred = model.predict(text_tfidf)[0] category_name = le.inverse_transform([y_pred])[0] probabilities = model.predict_proba(text_tfidf)[0] if np.max(probabilities>0.5) : return category_name else : return "Quite ambigous but maybe "+category_name def preprocess_text(text): import re from nltk.stem import WordNetLemmatizer from nltk.corpus import stopwords lemmatizer = WordNetLemmatizer() stop_words = set(stopwords.words('english')) text = re.sub('[^a-zA-Z]', ' ', text).lower() words = [lemmatizer.lemmatize(word) for word in text.split() if word not in stop_words] return ' '.join(words)