Backend-Sentiment-System / app /services /sentiment_service.py
AlainDeLong's picture
perf(api): refactor data insertion to enhance performance
59d66b9
import os
from typing import List, Dict, Any
from app.core.config import settings
import torch
import numpy as np
from scipy.special import softmax
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig
class SentimentService:
"""
A service for loading the sentiment analysis model and performing predictions.
"""
def __init__(self) -> None:
"""
Initialize the service by loading the sentiment analysis model and tokenizer.
"""
# Select device (GPU if available, otherwise CPU)
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if self.device.type == "cuda":
print(
f"GPU found: {torch.cuda.get_device_name(0)}. Loading model onto GPU."
)
else:
print("GPU not found. Loading model onto CPU.")
# Load model, tokenizer, and config (for id2label mapping)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
MODEL_DIR = os.path.join(
BASE_DIR, "models", "twitter-roberta-base-sentiment-latest"
)
print(MODEL_DIR)
if not os.path.exists(MODEL_DIR):
raise FileNotFoundError(f"Model folder not found: {MODEL_DIR}")
# Load tokenizer, config, model
self.tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
self.config = AutoConfig.from_pretrained(MODEL_DIR)
self.model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR).to(
self.device
)
self.model.eval() # set model to inference mode
print("Sentiment model loaded successfully.")
def _preprocess_text(self, text: str) -> str:
"""
Replace @user mentions and http links with placeholders.
"""
if not isinstance(text, str):
return ""
new_text = []
for t in text.split(" "):
t = "@user" if t.startswith("@") and len(t) > 1 else t
t = "http" if t.startswith("http") else t
new_text.append(t)
return " ".join(new_text)
def predict(self, texts: List[str]) -> List[Dict[str, Any]]:
"""
Predict sentiment for a batch of texts, splitting into sub-batches
for efficiency on CPU.
"""
# Preprocess all texts
preprocessed_texts = [self._preprocess_text(text) for text in texts]
# Keep only non-empty texts and remember their original indices
non_empty_texts_with_indices = [
(i, text) for i, text in enumerate(preprocessed_texts) if text.strip()
]
if not non_empty_texts_with_indices:
return []
indices, texts_to_predict = zip(*non_empty_texts_with_indices)
# --- Define batch size for CPU ---
batch_size = settings.INFERENCE_BATCH_SIZE
predictions = []
# --- Process in chunks ---
for start in range(0, len(texts_to_predict), batch_size):
sub_texts = texts_to_predict[start : start + batch_size]
# Tokenize
encoded_inputs = self.tokenizer(
list(sub_texts),
return_tensors="pt",
padding=True,
truncation=True,
max_length=512,
).to(self.device)
# Inference
with torch.no_grad():
outputs = self.model(**encoded_inputs)
logits = outputs.logits.detach().cpu().numpy()
# Clear memory
del encoded_inputs, outputs
if torch.cuda.is_available():
torch.cuda.empty_cache()
# Softmax + map to labels
probs = softmax(logits, axis=1)
for prob in probs:
max_idx = int(np.argmax(prob))
predictions.append(
{
"label": self.config.id2label[max_idx],
"score": float(prob[max_idx]),
}
)
# print(f" - Processed batch {start // batch_size + 1}...")
# Map predictions back to their original positions
final_results: List[Dict[str, Any] | None] = [None] * len(texts)
for original_index, prediction in zip(indices, predictions):
final_results[original_index] = prediction
# Replace None results with a default neutral prediction
default_prediction = {"label": "neutral", "score": 1.0}
return [res if res is not None else default_prediction for res in final_results]