Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| import re | |
| import string | |
| import joblib | |
| import pandas as pd | |
| import numpy as np | |
| from nltk.corpus import stopwords | |
| from nltk.stem import WordNetLemmatizer | |
| import nltk | |
| from glob import glob | |
| # Add the root directory to sys.path | |
| sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) | |
| from logging_config.logger_config import get_logger | |
| # Download necessary NLTK data files | |
| nltk.download('stopwords') | |
| nltk.download('wordnet') | |
| # Get the logger | |
| logger = get_logger(__name__) | |
| # Custom Preprocessor Class | |
| class TextPreprocessor: | |
| def __init__(self): | |
| self.stop_words = set(stopwords.words('english')) | |
| self.lemmatizer = WordNetLemmatizer() | |
| logger.info("TextPreprocessor initialized.") | |
| def preprocess_text(self, text): | |
| logger.info(f"Original text: {text}") | |
| # Lowercase the text | |
| text = text.lower() | |
| logger.info(f"Lowercased text: {text}") | |
| # Remove punctuation | |
| text = re.sub(f'[{re.escape(string.punctuation)}]', '', text) | |
| logger.info(f"Text after punctuation removal: {text}") | |
| # Remove numbers | |
| text = re.sub(r'\d+', '', text) | |
| logger.info(f"Text after number removal: {text}") | |
| # Tokenize the text | |
| words = text.split() | |
| logger.info(f"Tokenized text: {words}") | |
| # Remove stopwords and apply lemmatization | |
| words = [self.lemmatizer.lemmatize(word) for word in words if word not in self.stop_words] | |
| logger.info(f"Text after stopword removal and lemmatization: {words}") | |
| # Join words back into a single string | |
| cleaned_text = ' '.join(words) | |
| logger.info(f"Cleaned text: {cleaned_text}") | |
| return cleaned_text | |
| def get_latest_model_path(models_dir='./models'): | |
| model_files = glob(os.path.join(models_dir, 'model_v*.joblib')) | |
| if not model_files: | |
| logger.error("No model files found in the models directory.") | |
| raise FileNotFoundError("No model files found in the models directory.") | |
| latest_model_file = max(model_files, key=os.path.getctime) | |
| logger.info(f"Latest model file found: {latest_model_file}") | |
| return latest_model_file | |
| def load_model(): | |
| model_path = get_latest_model_path() | |
| logger.info(f"Loading model from {model_path}") | |
| return joblib.load(model_path) | |
| def predict(text, model): | |
| # Initialize the text preprocessor | |
| preprocessor = TextPreprocessor() | |
| # Preprocess the input text | |
| logger.info("Preprocessing input text...") | |
| cleaned_text = preprocessor.preprocess_text(text) | |
| # Make a prediction | |
| logger.info("Making prediction...") | |
| prediction = model.predict([cleaned_text]) | |
| logger.info(f"Prediction: {prediction}") | |
| return prediction[0] | |
| if __name__ == "__main__": | |
| # Example text input | |
| example_text = "I love programming in Python." | |
| # Load the latest model | |
| model = load_model() | |
| # Make a prediction | |
| prediction = predict(example_text, model) | |
| # Print the prediction | |
| print(f"Prediction: {prediction}") | |