Spaces:

AOUNZakaria
/

Sentiment_Analysis

Sleeping

App Files Files Community

Sentiment_Analysis / models.py

AOUNZakaria

Upload 10 files

77b0c9c verified 10 months ago

raw

history blame contribute delete

3.46 kB

	import pickle
	import logging
	import os
	from typing import Tuple, Any
	from pathlib import Path

	class SentimentModel:
	def __init__(self):
	self.count_vectorizer = None
	self.tfidf_transformer = None
	self.classifier = None
	self._load_models()

	def _load_models(self) -> None:
	"""Load all required ML models from pickle files."""
	try:
	# Get model path from environment or use default relative path
	default_path = str(Path(__file__).parent / 'Models')
	model_path = os.getenv('MODEL_PATH', default_path)
	logging.info(f"Loading models from: {model_path}")

	# Ensure the directory exists
	if not os.path.exists(model_path):
	raise FileNotFoundError(f"Model directory not found at: {model_path}")

	model_files = {
	'count_vectorizer': 'count_vect.pkl',
	'tfidf_transformer': 'transformer.pkl',
	'classifier': 'Text_LR.pkl'
	}

	for model_name, filename in model_files.items():
	file_path = os.path.join(model_path, filename)
	if not os.path.exists(file_path):
	raise FileNotFoundError(f"Model file not found: {file_path}")

	with open(file_path, 'rb') as f:
	setattr(self, model_name, pickle.load(f))
	logging.info(f"Successfully loaded {model_name}")

	except FileNotFoundError as e:
	logging.error(f"Model file not found: {str(e)}")
	raise
	except Exception as e:
	logging.error(f"Error loading models: {str(e)}")
	raise

	def predict(self, text: str) -> Tuple[str, float]:
	"""
	Predict sentiment for given text using the ML pipeline.

	Args:
	text: Input text for sentiment analysis

	Returns:
	Tuple containing sentiment label and confidence score
	"""
	try:
	if not all([self.count_vectorizer, self.tfidf_transformer, self.classifier]):
	raise RuntimeError("Models not properly initialized")

	# Transform text using CountVectorizer
	count_features = self.count_vectorizer.transform([text])
	logging.debug(f"Count features shape: {count_features.shape}")

	# Apply TF-IDF transformation
	tfidf_features = self.tfidf_transformer.transform(count_features)
	logging.debug(f"TF-IDF features shape: {tfidf_features.shape}")

	# Get prediction probabilities
	probabilities = self.classifier.predict_proba(tfidf_features)[0]
	logging.debug(f"Raw prediction probabilities: {probabilities}")

	# Find the class with highest probability
	max_prob_idx = probabilities.argmax()
	confidence = probabilities[max_prob_idx]

	# Map the prediction index to sentiment
	# Class 2 (index 2) appears to be positive sentiment based on the logs
	sentiment = "positive" if max_prob_idx == 2 else "negative"

	logging.info(f"Prediction for text: '{text[:50]}...' -> {sentiment} (confidence: {confidence:.2f})")
	logging.debug(f"Probabilities - Positive: {confidence:.3f}")

	return sentiment, float(confidence)

	except Exception as e:
	logging.error(f"Prediction error: {str(e)}")
	raise