Spaces:
Sleeping
Sleeping
| import pickle | |
| import logging | |
| import os | |
| from typing import Tuple, Any | |
| from pathlib import Path | |
| class SentimentModel: | |
| def __init__(self): | |
| self.count_vectorizer = None | |
| self.tfidf_transformer = None | |
| self.classifier = None | |
| self._load_models() | |
| def _load_models(self) -> None: | |
| """Load all required ML models from pickle files.""" | |
| try: | |
| # Get model path from environment or use default relative path | |
| default_path = str(Path(__file__).parent / 'Models') | |
| model_path = os.getenv('MODEL_PATH', default_path) | |
| logging.info(f"Loading models from: {model_path}") | |
| # Ensure the directory exists | |
| if not os.path.exists(model_path): | |
| raise FileNotFoundError(f"Model directory not found at: {model_path}") | |
| model_files = { | |
| 'count_vectorizer': 'count_vect.pkl', | |
| 'tfidf_transformer': 'transformer.pkl', | |
| 'classifier': 'Text_LR.pkl' | |
| } | |
| for model_name, filename in model_files.items(): | |
| file_path = os.path.join(model_path, filename) | |
| if not os.path.exists(file_path): | |
| raise FileNotFoundError(f"Model file not found: {file_path}") | |
| with open(file_path, 'rb') as f: | |
| setattr(self, model_name, pickle.load(f)) | |
| logging.info(f"Successfully loaded {model_name}") | |
| except FileNotFoundError as e: | |
| logging.error(f"Model file not found: {str(e)}") | |
| raise | |
| except Exception as e: | |
| logging.error(f"Error loading models: {str(e)}") | |
| raise | |
| def predict(self, text: str) -> Tuple[str, float]: | |
| """ | |
| Predict sentiment for given text using the ML pipeline. | |
| Args: | |
| text: Input text for sentiment analysis | |
| Returns: | |
| Tuple containing sentiment label and confidence score | |
| """ | |
| try: | |
| if not all([self.count_vectorizer, self.tfidf_transformer, self.classifier]): | |
| raise RuntimeError("Models not properly initialized") | |
| # Transform text using CountVectorizer | |
| count_features = self.count_vectorizer.transform([text]) | |
| logging.debug(f"Count features shape: {count_features.shape}") | |
| # Apply TF-IDF transformation | |
| tfidf_features = self.tfidf_transformer.transform(count_features) | |
| logging.debug(f"TF-IDF features shape: {tfidf_features.shape}") | |
| # Get prediction probabilities | |
| probabilities = self.classifier.predict_proba(tfidf_features)[0] | |
| logging.debug(f"Raw prediction probabilities: {probabilities}") | |
| # Find the class with highest probability | |
| max_prob_idx = probabilities.argmax() | |
| confidence = probabilities[max_prob_idx] | |
| # Map the prediction index to sentiment | |
| # Class 2 (index 2) appears to be positive sentiment based on the logs | |
| sentiment = "positive" if max_prob_idx == 2 else "negative" | |
| logging.info(f"Prediction for text: '{text[:50]}...' -> {sentiment} (confidence: {confidence:.2f})") | |
| logging.debug(f"Probabilities - Positive: {confidence:.3f}") | |
| return sentiment, float(confidence) | |
| except Exception as e: | |
| logging.error(f"Prediction error: {str(e)}") | |
| raise |