Spaces:

Rickyheyhey
/

finbert_market_evaluation

Sleeping

finbert_market_evaluation / src /streamlit_app.py

RickyGuoTheCrazish

update finbert_market_evaluation

33c14bd 8 months ago

14.4 kB

	# FinBERT Market Evaluation - Main Streamlit Application
	"""
	A confidence-aware, volatility-adjusted post-market evaluator for FinBERT sentiment
	predictions against actual stock market movements.
	"""

	import streamlit as st
	import pandas as pd
	import numpy as np
	import plotly.graph_objects as go
	import plotly.express as px
	from datetime import datetime, timedelta, date
	import time
	import logging

	# Import our custom modules
	from sentiment_analyzer import FinBERTAnalyzer
	from market_data import MarketDataService
	from evaluation import EvaluationEngine

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Page configuration
	st.set_page_config(
	page_title="FinBERT Market Evaluation",
	page_icon="🚀",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Initialize session state for rate limiting
	if 'last_request_time' not in st.session_state:
	st.session_state.last_request_time = 0

	if 'evaluation_history' not in st.session_state:
	st.session_state.evaluation_history = []

	# Initialize services
	@st.cache_resource
	def initialize_services():
	"""Initialize all services with caching."""
	analyzer = FinBERTAnalyzer()
	market_service = MarketDataService()
	evaluation_engine = EvaluationEngine()
	return analyzer, market_service, evaluation_engine

	def check_rate_limit():
	"""Check if rate limit allows new request (30 seconds)."""
	current_time = time.time()
	time_since_last = current_time - st.session_state.last_request_time
	return time_since_last >= 30

	def update_rate_limit():
	"""Update the last request time."""
	st.session_state.last_request_time = time.time()

	def create_das_chart(das_score: float, confidence: float, impact: float):
	"""Create horizontal bar chart for DAS, confidence, and impact."""
	fig = go.Figure()

	metrics = ['DAS Score', 'Confidence', 'Impact (scaled)']
	values = [das_score, confidence, min(impact / 5.0, 1.0)] # Scale impact to 0-1
	colors = ['#1f77b4', '#ff7f0e', '#2ca02c']

	fig.add_trace(go.Bar(
	y=metrics,
	x=values,
	orientation='h',
	marker_color=colors,
	text=[f'{v:.3f}' for v in values],
	textposition='inside'
	))

	fig.update_layout(
	title="Evaluation Metrics",
	xaxis_title="Score",
	height=200,
	margin=dict(l=100, r=50, t=50, b=50)
	)

	return fig

	def display_evaluation_result(result: dict):
	"""Display comprehensive evaluation results."""
	if "error" in result:
	st.error(f"Evaluation Error: {result['error']}")
	return

	# Prominent evaluation summary first
	st.markdown(f"### {result['evaluation_summary']}")

	# Key insights in a highlighted box
	alignment_color = "green" if result['is_correct'] else "red"
	volatility_note = "🔥 Extremely High" if result['volatility_14d'] > 100 else "📊 High" if result['volatility_14d'] > 50 else "📈 Normal"

	# Calculate if movement was significant
	movement_significant = result['impact'] > result['threshold']
	significance_text = "exceeded" if movement_significant else "was below"

	st.markdown(f"""
	<div style="background-color: rgba(0,0,0,0.1); padding: 15px; border-radius: 10px; margin: 10px 0;">
	<h4>📊 Volatility-Aware Analysis:</h4>
	<ul>
	<li><strong>Stock's 14-day volatility:</strong> {result['volatility_14d']:.1f}% ({volatility_note.lower()})</li>
	<li><strong>Significance threshold:</strong> {result['threshold']:.1f}% (= 1.0 × volatility)</li>
	<li><strong>Actual movement:</strong> {result['return_24h']:+.2f}% ({result['impact']:.2f}% magnitude)</li>
	<li><strong>Movement significance:</strong> {significance_text} threshold → {'Significant' if movement_significant else 'Not significant'}</li>
	<li><strong>Directional alignment:</strong> <span style="color: {alignment_color};">{'✅ Correct direction' if result['is_correct'] else '❌ Wrong direction or insufficient magnitude'}</span></li>
	<li><strong>Model confidence:</strong> {'High' if result['confidence'] > 0.8 else 'Medium' if result['confidence'] > 0.6 else 'Low'} ({result['confidence']:.1%})</li>
	</ul>
	</div>
	""", unsafe_allow_html=True)

	# Main metrics in columns
	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.metric("DAS Score", f"{result['das_score']:.3f}", help="Directional Alignment Score (0-1, higher is better)")

	with col2:
	sentiment_emoji = {"positive": "📈", "negative": "📉", "neutral": "➡️"}
	st.metric("Sentiment", f"{sentiment_emoji.get(result['sentiment'], '❓')} {result['sentiment'].title()}")

	with col3:
	st.metric("Confidence", f"{result['confidence']:.1%}")

	with col4:
	return_color = "normal" if abs(result['return_24h']) < result['threshold'] else "inverse"
	st.metric("Same-Day Return", f"{result['return_24h']:+.2f}%", delta=f"vs {result['threshold']:.1f}% threshold")

	# Additional metrics for 24h return if available
	if result.get('return_next_24h') is not None:
	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("Next 24h Return", f"{result['return_next_24h']:+.2f}%", help="Return from close of news day to close of next trading day")
	with col2:
	if result.get('alpha_24h') is not None:
	st.metric("24h Alpha", f"{result['alpha_24h']:+.2f}%", help="24h return vs market performance")
	with col3:
	# Show combined impact
	combined_impact = abs(result['return_24h']) + abs(result.get('return_next_24h', 0))
	st.metric("Combined Impact", f"{combined_impact:.2f}%", help="Total magnitude of price movement")
	with col4:
	# Show follow-through consistency
	same_direction = (result['return_24h'] * result.get('return_next_24h', 0)) > 0
	consistency = "✅ Consistent" if same_direction else "🔄 Reversal"
	st.metric("Follow-through", consistency, help="Whether 24h movement continued same direction")

	# Visualization
	chart = create_das_chart(result['das_score'], result['confidence'], result['impact'])
	# Use session state to create unique chart counter
	if 'chart_counter' not in st.session_state:
	st.session_state.chart_counter = 0
	st.session_state.chart_counter += 1
	chart_key = f"chart_{st.session_state.chart_counter}"
	st.plotly_chart(chart, use_container_width=True, key=chart_key)

	# Technical metrics (always visible)
	st.subheader("📊 Technical Metrics")

	col1, col2, col3 = st.columns(3)

	with col1:
	st.metric("Ticker", result['ticker'])
	st.metric("News Date", result['news_date'])
	st.metric("14-day Volatility", f"{result['volatility_14d']:.2f}%")
	st.metric("Significance Threshold", f"{result['threshold']:.2f}%")

	with col2:
	st.metric("Same-Day Impact", f"{result['impact']:.2f}%")
	if result.get('return_next_24h') is not None:
	st.metric("24h Impact", f"{abs(result['return_next_24h']):.2f}%")
	st.metric("WAT Weight", f"{result['wat_weight']:.3f}")
	alignment_text = "✅ Yes" if result['is_correct'] else "❌ No"
	st.metric("Alignment", alignment_text)

	with col3:
	alpha_val = result.get('alpha_adjusted', 'N/A')
	alpha_str = f"{alpha_val:+.2f}%" if isinstance(alpha_val, (int, float)) else str(alpha_val)
	st.metric("Same-Day Alpha", alpha_str)

	if result.get('alpha_24h') is not None:
	st.metric("24h Alpha", f"{result['alpha_24h']:+.2f}%")

	# Market context
	market_same = result.get('market_return', 'N/A')
	market_str = f"{market_same:+.2f}%" if isinstance(market_same, (int, float)) else str(market_same)
	st.metric("Market Return", market_str)

	def main():
	"""Main application function."""
	# Header
	st.title("🚀 FinBERT Market Evaluation")
	st.markdown("""
	A confidence-aware, volatility-adjusted post-market evaluator for FinBERT sentiment predictions.
	Evaluate how well FinBERT's financial news sentiment aligns with actual stock market movements.
	""")

	# Sidebar info (no user configuration needed)
	st.sidebar.header("📊 Evaluation Framework")
	st.sidebar.markdown("""
	Dual-Period Analysis:
	- Same-Day: Intraday return (Close - Open)
	- Next 24h: Close-to-close follow-through
	- Combined: Complete market reaction picture

	Volatility-Aware Evaluation:
	- Uses each stock's 14-day volatility
	- Threshold = 1.0 × volatility (k=1.0)
	- Adapts to stock movement patterns

	Directional Alignment Score:
	- Graded 0-1 score (not binary)
	- Based on same-day return vs threshold
	- Higher = better alignment

	Alpha Analysis:
	- Stock return vs market performance
	- Isolates stock-specific impact
	- Available for both time periods
	""")

	# Fixed research parameters (not user-configurable)
	volatility_multiplier = 1.0 # k = 1.0 as per your framework
	confidence_threshold = 0.7 # Reasonable default

	# Initialize services
	try:
	analyzer, market_service, evaluation_engine = initialize_services()
	evaluation_engine.volatility_multiplier = volatility_multiplier
	evaluation_engine.confidence_threshold = confidence_threshold
	except Exception as e:
	st.error(f"Failed to initialize services: {str(e)}")
	st.stop()

	# Main input form
	st.header("📰 News Analysis")

	with st.form("evaluation_form"):
	# News text input
	news_text = st.text_area(
	"Financial News Text",
	height=150,
	placeholder="Enter financial news headline or summary here...",
	help="Paste the financial news text you want to analyze"
	)

	col1, col2 = st.columns(2)

	with col1:
	ticker = st.text_input(
	"Stock Ticker",
	placeholder="e.g., TSLA, AAPL, MSFT",
	help="Enter the stock ticker symbol"
	).upper()

	with col2:
	news_date = st.date_input(
	"News Publication Date",
	value=date.today() - timedelta(days=1),
	max_value=date.today() - timedelta(days=1),
	help="Date when the news was published (must be at least 1 day ago)"
	)

	submitted = st.form_submit_button("🔍 Evaluate Prediction")

	# Process evaluation
	if submitted:
	if not news_text.strip():
	st.error("Please enter some news text to analyze.")
	return

	if not ticker:
	st.error("Please enter a stock ticker symbol.")
	return

	# Rate limiting check
	if not check_rate_limit():
	remaining_time = 30 - (time.time() - st.session_state.last_request_time)
	st.warning(f"Rate limit: Please wait {remaining_time:.0f} more seconds before next request.")
	return

	# Update rate limit
	update_rate_limit()

	# Show progress
	progress_bar = st.progress(0)
	status_text = st.empty()

	try:
	# Step 1: Sentiment Analysis
	status_text.text("🤖 Analyzing sentiment with FinBERT...")
	progress_bar.progress(25)

	sentiment_result = analyzer.analyze_sentiment(news_text)

	# Step 2: Market Data
	status_text.text("📊 Fetching market data...")
	progress_bar.progress(50)

	news_datetime = datetime.combine(news_date, datetime.min.time())
	market_result = market_service.get_stock_evaluation_data(ticker, news_datetime)

	# Step 3: Evaluation
	status_text.text("⚖️ Evaluating prediction...")
	progress_bar.progress(75)

	evaluation_result = evaluation_engine.evaluate_prediction(
	sentiment_result, market_result, news_datetime
	)

	# Step 4: Display Results
	status_text.text("✅ Evaluation complete!")
	progress_bar.progress(100)

	# Clear progress indicators
	time.sleep(0.5)
	progress_bar.empty()
	status_text.empty()

	# Display results
	st.header("📊 Evaluation Results")
	display_evaluation_result(evaluation_result)

	# Add to history
	if "error" not in evaluation_result:
	st.session_state.evaluation_history.append(evaluation_result)

	except Exception as e:
	progress_bar.empty()
	status_text.empty()
	st.error(f"Evaluation failed: {str(e)}")
	logger.error(f"Evaluation error: {str(e)}")

	# Evaluation History Section
	if st.session_state.evaluation_history:
	st.header("📋 Previous Evaluations")

	# Show most recent evaluations first (reverse chronological)
	recent_evaluations = list(reversed(st.session_state.evaluation_history))

	# Show recent evaluations in expandable cards
	for i, result in enumerate(recent_evaluations):
	# Create a concise title for each evaluation
	alignment_icon = "✅" if result['is_correct'] else "❌"
	sentiment_icon = {"positive": "📈", "negative": "📉", "neutral": "➡️"}.get(result['sentiment'], "❓")

	title = f"{alignment_icon} {result['ticker']} ({result['news_date']}) - {sentiment_icon} {result['sentiment'].title()} → {result['return_24h']:+.1f}% \| DAS: {result['das_score']:.3f}"

	with st.expander(title, expanded=(i==0)): # Expand the most recent one
	display_evaluation_result(result)

	# Simple action buttons
	st.markdown("---")

	# Simple action buttons
	col1, col2 = st.columns([1, 3])

	with col1:
	if st.button("🗑️ Clear All History"):
	st.session_state.evaluation_history = []
	st.rerun()

	with col2:
	st.caption(f"📊 {len(st.session_state.evaluation_history)} evaluation(s) completed")

	# Footer
	st.markdown("---")
	st.caption("🚀 FinBERT Market Evaluation \| Rate limit: 30s \| Model: ProsusAI/finbert \| Data: Yahoo Finance")

	if __name__ == "__main__":
	main()