Spaces:
Running
Running
File size: 2,068 Bytes
92b802f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | import logging
import torch
import torch.nn.functional as F
from transformers import AutoModelForSequenceClassification, AutoTokenizer
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Target your specific Hugging Face repository
HF_MODEL_REPO = 'mobadara/finbert-finetuned'
logging.info(f'Initializing NLP pipeline from {HF_MODEL_REPO}...')
# Load tokenizer and model weights
tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_REPO)
model = AutoModelForSequenceClassification.from_pretrained(HF_MODEL_REPO)
model.eval() # Lock the model in evaluation mode for inference
# Map model output indices to our target classes
LABEL_MAPPING = {0: 'Negative', 1: 'Neutral', 2: "Positive"}
def predict(text: str) -> dict:
"""
Takes raw text, tokenizes it, runs it through FinBERT,
and returns the predicted sentiment and confidence score.
Args:
text (str): The input text to analyze.
Returns:
dict: A dictionary containing the predicted sentiment and confidence score.
Throws:
ValueError: If the input text is empty or None.
"""
if not text:
raise ValueError("Input text cannot be empty or None.")
# Tokenize the incoming text
inputs = tokenizer(
text,
return_tensors='pt',
truncation=True,
padding=True,
max_length=512
)
# Perform inference without tracking gradients (saves memory/time)
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
# Convert raw logits to probabilities
probabilities = F.softmax(logits, dim=-1)
# Extract the highest probability and its index
confidence_score, predicted_class_idx = torch.max(probabilities, dim=1)
sentiment_label = LABEL_MAPPING[predicted_class_idx.item()]
confidence_float = round(confidence_score.item(), 4)
return {
'sentiment': sentiment_label,
'confidence': confidence_float
} |