finbert-sentiment-api / app /ml_model.py
mobadara's picture
Sync from GitHub via hub-sync
92b802f verified
import logging
import torch
import torch.nn.functional as F
from transformers import AutoModelForSequenceClassification, AutoTokenizer
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Target your specific Hugging Face repository
HF_MODEL_REPO = 'mobadara/finbert-finetuned'
logging.info(f'Initializing NLP pipeline from {HF_MODEL_REPO}...')
# Load tokenizer and model weights
tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_REPO)
model = AutoModelForSequenceClassification.from_pretrained(HF_MODEL_REPO)
model.eval() # Lock the model in evaluation mode for inference
# Map model output indices to our target classes
LABEL_MAPPING = {0: 'Negative', 1: 'Neutral', 2: "Positive"}
def predict(text: str) -> dict:
"""
Takes raw text, tokenizes it, runs it through FinBERT,
and returns the predicted sentiment and confidence score.
Args:
text (str): The input text to analyze.
Returns:
dict: A dictionary containing the predicted sentiment and confidence score.
Throws:
ValueError: If the input text is empty or None.
"""
if not text:
raise ValueError("Input text cannot be empty or None.")
# Tokenize the incoming text
inputs = tokenizer(
text,
return_tensors='pt',
truncation=True,
padding=True,
max_length=512
)
# Perform inference without tracking gradients (saves memory/time)
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
# Convert raw logits to probabilities
probabilities = F.softmax(logits, dim=-1)
# Extract the highest probability and its index
confidence_score, predicted_class_idx = torch.max(probabilities, dim=1)
sentiment_label = LABEL_MAPPING[predicted_class_idx.item()]
confidence_float = round(confidence_score.item(), 4)
return {
'sentiment': sentiment_label,
'confidence': confidence_float
}