Spaces:

mobadara
/

finbert-sentiment-api

Running

App Files Files Community

finbert-sentiment-api / app /ml_model.py

mobadara

Sync from GitHub via hub-sync

92b802f verified 16 days ago

raw

history blame contribute delete

2.07 kB

	import logging
	import torch
	import torch.nn.functional as F
	from transformers import AutoModelForSequenceClassification, AutoTokenizer

	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
	# Target your specific Hugging Face repository
	HF_MODEL_REPO = 'mobadara/finbert-finetuned'

	logging.info(f'Initializing NLP pipeline from {HF_MODEL_REPO}...')

	# Load tokenizer and model weights
	tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_REPO)
	model = AutoModelForSequenceClassification.from_pretrained(HF_MODEL_REPO)
	model.eval() # Lock the model in evaluation mode for inference

	# Map model output indices to our target classes
	LABEL_MAPPING = {0: 'Negative', 1: 'Neutral', 2: "Positive"}

	def predict(text: str) -> dict:
	"""
	Takes raw text, tokenizes it, runs it through FinBERT,
	and returns the predicted sentiment and confidence score.

	Args:
	text (str): The input text to analyze.

	Returns:
	dict: A dictionary containing the predicted sentiment and confidence score.

	Throws:
	ValueError: If the input text is empty or None.
	"""
	if not text:
	raise ValueError("Input text cannot be empty or None.")

	# Tokenize the incoming text
	inputs = tokenizer(
	text,
	return_tensors='pt',
	truncation=True,
	padding=True,
	max_length=512
	)

	# Perform inference without tracking gradients (saves memory/time)
	with torch.no_grad():
	outputs = model(**inputs)
	logits = outputs.logits

	# Convert raw logits to probabilities
	probabilities = F.softmax(logits, dim=-1)

	# Extract the highest probability and its index
	confidence_score, predicted_class_idx = torch.max(probabilities, dim=1)

	sentiment_label = LABEL_MAPPING[predicted_class_idx.item()]
	confidence_float = round(confidence_score.item(), 4)

	return {
	'sentiment': sentiment_label,
	'confidence': confidence_float
	}