File size: 2,068 Bytes
92b802f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import logging
import torch
import torch.nn.functional as F
from transformers import AutoModelForSequenceClassification, AutoTokenizer

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Target your specific Hugging Face repository
HF_MODEL_REPO = 'mobadara/finbert-finetuned'

logging.info(f'Initializing NLP pipeline from {HF_MODEL_REPO}...')

# Load tokenizer and model weights
tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_REPO)
model = AutoModelForSequenceClassification.from_pretrained(HF_MODEL_REPO)
model.eval()  # Lock the model in evaluation mode for inference

# Map model output indices to our target classes
LABEL_MAPPING = {0: 'Negative', 1: 'Neutral', 2: "Positive"}

def predict(text: str) -> dict:
    """
    Takes raw text, tokenizes it, runs it through FinBERT, 
    and returns the predicted sentiment and confidence score.
    
    Args:
        text (str): The input text to analyze.
        
    Returns:
        dict: A dictionary containing the predicted sentiment and confidence score.
        
    Throws:
        ValueError: If the input text is empty or None.
    """
    if not text:
        raise ValueError("Input text cannot be empty or None.")

    # Tokenize the incoming text
    inputs = tokenizer(
        text, 
        return_tensors='pt', 
        truncation=True, 
        padding=True, 
        max_length=512
    )
    
    # Perform inference without tracking gradients (saves memory/time)
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        
        # Convert raw logits to probabilities
        probabilities = F.softmax(logits, dim=-1)
        
        # Extract the highest probability and its index
        confidence_score, predicted_class_idx = torch.max(probabilities, dim=1)
        
        sentiment_label = LABEL_MAPPING[predicted_class_idx.item()]
        confidence_float = round(confidence_score.item(), 4)
        
    return {
        'sentiment': sentiment_label,
        'confidence': confidence_float
    }