Spaces:

Rickyheyhey
/

finbert_market_evaluation

Build error

finbert_market_evaluation / src /sentiment_analyzer.py

RickyGuoTheCrazish

update docker file

47d83ad 10 months ago

5.1 kB

	# FinBERT sentiment analysis module for financial news
	"""
	This module handles loading the ProsusAI/finbert model and extracting
	sentiment predictions with confidence scores from financial news text.
	"""

	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import streamlit as st
	from typing import Dict, Tuple
	import logging

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	class FinBERTAnalyzer:
	"""
	A wrapper class for the ProsusAI/finbert model to analyze financial sentiment.
	"""

	def __init__(self, model_name: str = "ProsusAI/finbert"):
	"""
	Initialize the FinBERT analyzer.

	Args:
	model_name: The Hugging Face model identifier
	"""
	self.model_name = model_name
	self.tokenizer = None
	self.model = None
	self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	@st.cache_resource
	def load_model(_self):
	"""
	Load the FinBERT model and tokenizer with caching.
	Using _self to avoid hashing issues with streamlit cache.
	"""
	try:
	logger.info(f"Loading FinBERT model: {_self.model_name}")

	# Try to load tokenizer first
	_self.tokenizer = AutoTokenizer.from_pretrained(
	_self.model_name,
	cache_dir=None, # Use default cache
	local_files_only=False # Allow downloading if needed
	)
	logger.info("Tokenizer loaded successfully")

	# Load model
	_self.model = AutoModelForSequenceClassification.from_pretrained(
	_self.model_name,
	cache_dir=None, # Use default cache
	local_files_only=False # Allow downloading if needed
	)
	_self.model.to(_self.device)
	_self.model.eval()
	logger.info("FinBERT model loaded successfully")
	return True

	except Exception as e:
	error_msg = f"Error loading FinBERT model: {str(e)}"
	logger.error(error_msg)

	# Provide helpful error messages
	if "Connection" in str(e) or "timeout" in str(e).lower():
	logger.error("Network connection issue. Check internet connectivity.")
	elif "disk" in str(e).lower() or "space" in str(e).lower():
	logger.error("Insufficient disk space for model download.")
	elif "permission" in str(e).lower():
	logger.error("Permission denied. Check file/directory permissions.")

	return False

	def analyze_sentiment(self, text: str) -> Dict[str, float]:
	"""
	Analyze sentiment of financial news text.

	Args:
	text: The financial news text to analyze

	Returns:
	Dictionary containing sentiment label, confidence, and raw scores
	"""
	if not self.model or not self.tokenizer:
	if not self.load_model():
	raise RuntimeError("Failed to load FinBERT model")

	try:
	# Tokenize input
	inputs = self.tokenizer(
	text,
	return_tensors="pt",
	truncation=True,
	padding=True,
	max_length=512
	)
	inputs = {k: v.to(self.device) for k, v in inputs.items()}

	# Get predictions
	with torch.no_grad():
	outputs = self.model(**inputs)
	predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)

	# Extract results
	scores = predictions.cpu().numpy()[0]
	labels = ["negative", "neutral", "positive"]

	# Find the predicted sentiment and confidence
	predicted_idx = scores.argmax()
	predicted_sentiment = labels[predicted_idx]
	confidence = float(scores[predicted_idx])

	return {
	"sentiment": predicted_sentiment,
	"confidence": confidence,
	"scores": {
	"negative": float(scores[0]),
	"neutral": float(scores[1]),
	"positive": float(scores[2])
	}
	}

	except Exception as e:
	logger.error(f"Error analyzing sentiment: {str(e)}")
	raise RuntimeError(f"Sentiment analysis failed: {str(e)}")

	def get_sentiment_direction(self, sentiment: str) -> int:
	"""
	Convert sentiment label to numerical direction for evaluation.

	Args:
	sentiment: The sentiment label ("positive", "negative", "neutral")

	Returns:
	1 for positive, -1 for negative, 0 for neutral
	"""
	sentiment_map = {
	"positive": 1,
	"negative": -1,
	"neutral": 0
	}
	return sentiment_map.get(sentiment.lower(), 0)