Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| import joblib | |
| import nltk | |
| import os | |
| import numpy as np | |
| # Set NLTK data directory | |
| nltk_data_path = "/app/nltk_data" | |
| os.environ["NLTK_DATA"] = nltk_data_path | |
| nltk.data.path.append(nltk_data_path) | |
| # Load ML model and transformers | |
| model = joblib.load("model.joblib") | |
| tfidf_vectorizer = joblib.load("tfidf_vectorizer.joblib") | |
| le = joblib.load("labelencoder.joblib") | |
| class TextInput(BaseModel): | |
| text: str | |
| app = FastAPI() | |
| def predict(input: TextInput): | |
| processed_text = preprocess_text(input.text) | |
| text_tfidf = tfidf_vectorizer.transform([processed_text]).toarray() | |
| y_pred = model.predict(text_tfidf)[0] | |
| category_name = le.inverse_transform([y_pred])[0] | |
| probabilities = model.predict_proba(text_tfidf)[0] | |
| if np.max(probabilities>0.5) : | |
| return category_name | |
| else : | |
| return "Quite ambigous but maybe "+category_name | |
| def preprocess_text(text): | |
| import re | |
| from nltk.stem import WordNetLemmatizer | |
| from nltk.corpus import stopwords | |
| lemmatizer = WordNetLemmatizer() | |
| stop_words = set(stopwords.words('english')) | |
| text = re.sub('[^a-zA-Z]', ' ', text).lower() | |
| words = [lemmatizer.lemmatize(word) for word in text.split() if word not in stop_words] | |
| return ' '.join(words) | |