Spaces:
Running
Running
| import logging | |
| import torch | |
| import torch.nn.functional as F | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| # Target your specific Hugging Face repository | |
| HF_MODEL_REPO = 'mobadara/finbert-finetuned' | |
| logging.info(f'Initializing NLP pipeline from {HF_MODEL_REPO}...') | |
| # Load tokenizer and model weights | |
| tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_REPO) | |
| model = AutoModelForSequenceClassification.from_pretrained(HF_MODEL_REPO) | |
| model.eval() # Lock the model in evaluation mode for inference | |
| # Map model output indices to our target classes | |
| LABEL_MAPPING = {0: 'Negative', 1: 'Neutral', 2: "Positive"} | |
| def predict(text: str) -> dict: | |
| """ | |
| Takes raw text, tokenizes it, runs it through FinBERT, | |
| and returns the predicted sentiment and confidence score. | |
| Args: | |
| text (str): The input text to analyze. | |
| Returns: | |
| dict: A dictionary containing the predicted sentiment and confidence score. | |
| Throws: | |
| ValueError: If the input text is empty or None. | |
| """ | |
| if not text: | |
| raise ValueError("Input text cannot be empty or None.") | |
| # Tokenize the incoming text | |
| inputs = tokenizer( | |
| text, | |
| return_tensors='pt', | |
| truncation=True, | |
| padding=True, | |
| max_length=512 | |
| ) | |
| # Perform inference without tracking gradients (saves memory/time) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| logits = outputs.logits | |
| # Convert raw logits to probabilities | |
| probabilities = F.softmax(logits, dim=-1) | |
| # Extract the highest probability and its index | |
| confidence_score, predicted_class_idx = torch.max(probabilities, dim=1) | |
| sentiment_label = LABEL_MAPPING[predicted_class_idx.item()] | |
| confidence_float = round(confidence_score.item(), 4) | |
| return { | |
| 'sentiment': sentiment_label, | |
| 'confidence': confidence_float | |
| } |