| | |
| | """ |
| | A confidence-aware, volatility-adjusted post-market evaluator for FinBERT sentiment |
| | predictions against actual stock market movements. |
| | """ |
| |
|
| | import streamlit as st |
| | import pandas as pd |
| | import numpy as np |
| | import plotly.graph_objects as go |
| | import plotly.express as px |
| | from datetime import datetime, timedelta, date |
| | import time |
| | import logging |
| |
|
| | |
| | from sentiment_analyzer import FinBERTAnalyzer |
| | from market_data import MarketDataService |
| | from evaluation import EvaluationEngine |
| |
|
| | |
| | logging.basicConfig(level=logging.INFO) |
| | logger = logging.getLogger(__name__) |
| |
|
| | |
| | st.set_page_config( |
| | page_title="FinBERT Market Evaluation", |
| | page_icon="π", |
| | layout="wide", |
| | initial_sidebar_state="expanded" |
| | ) |
| |
|
| | |
| | if 'last_request_time' not in st.session_state: |
| | st.session_state.last_request_time = 0 |
| |
|
| | if 'evaluation_history' not in st.session_state: |
| | st.session_state.evaluation_history = [] |
| |
|
| | |
| | @st.cache_resource |
| | def initialize_services(): |
| | """Initialize all services with caching.""" |
| | analyzer = FinBERTAnalyzer() |
| | market_service = MarketDataService() |
| | evaluation_engine = EvaluationEngine() |
| | return analyzer, market_service, evaluation_engine |
| |
|
| | def check_rate_limit(): |
| | """Check if rate limit allows new request (30 seconds).""" |
| | current_time = time.time() |
| | time_since_last = current_time - st.session_state.last_request_time |
| | return time_since_last >= 30 |
| |
|
| | def update_rate_limit(): |
| | """Update the last request time.""" |
| | st.session_state.last_request_time = time.time() |
| |
|
| | def create_das_chart(das_score: float, confidence: float, impact: float): |
| | """Create horizontal bar chart for DAS, confidence, and impact.""" |
| | fig = go.Figure() |
| |
|
| | metrics = ['DAS Score', 'Confidence', 'Impact (scaled)'] |
| | values = [das_score, confidence, min(impact / 5.0, 1.0)] |
| | colors = ['#1f77b4', '#ff7f0e', '#2ca02c'] |
| |
|
| | fig.add_trace(go.Bar( |
| | y=metrics, |
| | x=values, |
| | orientation='h', |
| | marker_color=colors, |
| | text=[f'{v:.3f}' for v in values], |
| | textposition='inside' |
| | )) |
| |
|
| | fig.update_layout( |
| | title="Evaluation Metrics", |
| | xaxis_title="Score", |
| | height=200, |
| | margin=dict(l=100, r=50, t=50, b=50) |
| | ) |
| |
|
| | return fig |
| |
|
| | def display_evaluation_result(result: dict): |
| | """Display comprehensive evaluation results.""" |
| | if "error" in result: |
| | st.error(f"Evaluation Error: {result['error']}") |
| | return |
| |
|
| | |
| | st.markdown(f"### {result['evaluation_summary']}") |
| |
|
| | |
| | alignment_color = "green" if result['is_correct'] else "red" |
| | volatility_note = "π₯ Extremely High" if result['volatility_14d'] > 100 else "π High" if result['volatility_14d'] > 50 else "π Normal" |
| |
|
| | |
| | movement_significant = result['impact'] > result['threshold'] |
| | significance_text = "exceeded" if movement_significant else "was below" |
| |
|
| | st.markdown(f""" |
| | <div style="background-color: rgba(0,0,0,0.1); padding: 15px; border-radius: 10px; margin: 10px 0;"> |
| | <h4>π Volatility-Aware Analysis:</h4> |
| | <ul> |
| | <li><strong>Stock's 14-day volatility:</strong> {result['volatility_14d']:.1f}% ({volatility_note.lower()})</li> |
| | <li><strong>Significance threshold:</strong> {result['threshold']:.1f}% (= 1.0 Γ volatility)</li> |
| | <li><strong>Actual movement:</strong> {result['return_24h']:+.2f}% ({result['impact']:.2f}% magnitude)</li> |
| | <li><strong>Movement significance:</strong> {significance_text} threshold β {'Significant' if movement_significant else 'Not significant'}</li> |
| | <li><strong>Directional alignment:</strong> <span style="color: {alignment_color};">{'β
Correct direction' if result['is_correct'] else 'β Wrong direction or insufficient magnitude'}</span></li> |
| | <li><strong>Model confidence:</strong> {'High' if result['confidence'] > 0.8 else 'Medium' if result['confidence'] > 0.6 else 'Low'} ({result['confidence']:.1%})</li> |
| | </ul> |
| | </div> |
| | """, unsafe_allow_html=True) |
| |
|
| | |
| | col1, col2, col3, col4 = st.columns(4) |
| |
|
| | with col1: |
| | st.metric("DAS Score", f"{result['das_score']:.3f}", help="Directional Alignment Score (0-1, higher is better)") |
| |
|
| | with col2: |
| | sentiment_emoji = {"positive": "π", "negative": "π", "neutral": "β‘οΈ"} |
| | st.metric("Sentiment", f"{sentiment_emoji.get(result['sentiment'], 'β')} {result['sentiment'].title()}") |
| |
|
| | with col3: |
| | st.metric("Confidence", f"{result['confidence']:.1%}") |
| |
|
| | with col4: |
| | return_color = "normal" if abs(result['return_24h']) < result['threshold'] else "inverse" |
| | st.metric("Same-Day Return", f"{result['return_24h']:+.2f}%", delta=f"vs {result['threshold']:.1f}% threshold") |
| |
|
| | |
| | if result.get('return_next_24h') is not None: |
| | col1, col2, col3, col4 = st.columns(4) |
| | with col1: |
| | st.metric("Next 24h Return", f"{result['return_next_24h']:+.2f}%", help="Return from close of news day to close of next trading day") |
| | with col2: |
| | if result.get('alpha_24h') is not None: |
| | st.metric("24h Alpha", f"{result['alpha_24h']:+.2f}%", help="24h return vs market performance") |
| | with col3: |
| | |
| | combined_impact = abs(result['return_24h']) + abs(result.get('return_next_24h', 0)) |
| | st.metric("Combined Impact", f"{combined_impact:.2f}%", help="Total magnitude of price movement") |
| | with col4: |
| | |
| | same_direction = (result['return_24h'] * result.get('return_next_24h', 0)) > 0 |
| | consistency = "β
Consistent" if same_direction else "π Reversal" |
| | st.metric("Follow-through", consistency, help="Whether 24h movement continued same direction") |
| |
|
| | |
| | chart = create_das_chart(result['das_score'], result['confidence'], result['impact']) |
| | |
| | if 'chart_counter' not in st.session_state: |
| | st.session_state.chart_counter = 0 |
| | st.session_state.chart_counter += 1 |
| | chart_key = f"chart_{st.session_state.chart_counter}" |
| | st.plotly_chart(chart, use_container_width=True, key=chart_key) |
| |
|
| | |
| | st.subheader("π Technical Metrics") |
| |
|
| | col1, col2, col3 = st.columns(3) |
| |
|
| | with col1: |
| | st.metric("Ticker", result['ticker']) |
| | st.metric("News Date", result['news_date']) |
| | st.metric("14-day Volatility", f"{result['volatility_14d']:.2f}%") |
| | st.metric("Significance Threshold", f"{result['threshold']:.2f}%") |
| |
|
| | with col2: |
| | st.metric("Same-Day Impact", f"{result['impact']:.2f}%") |
| | if result.get('return_next_24h') is not None: |
| | st.metric("24h Impact", f"{abs(result['return_next_24h']):.2f}%") |
| | st.metric("WAT Weight", f"{result['wat_weight']:.3f}") |
| | alignment_text = "β
Yes" if result['is_correct'] else "β No" |
| | st.metric("Alignment", alignment_text) |
| |
|
| | with col3: |
| | alpha_val = result.get('alpha_adjusted', 'N/A') |
| | alpha_str = f"{alpha_val:+.2f}%" if isinstance(alpha_val, (int, float)) else str(alpha_val) |
| | st.metric("Same-Day Alpha", alpha_str) |
| |
|
| | if result.get('alpha_24h') is not None: |
| | st.metric("24h Alpha", f"{result['alpha_24h']:+.2f}%") |
| |
|
| | |
| | market_same = result.get('market_return', 'N/A') |
| | market_str = f"{market_same:+.2f}%" if isinstance(market_same, (int, float)) else str(market_same) |
| | st.metric("Market Return", market_str) |
| |
|
| | def main(): |
| | """Main application function.""" |
| | |
| | st.title("π FinBERT Market Evaluation") |
| | st.markdown(""" |
| | A confidence-aware, volatility-adjusted post-market evaluator for FinBERT sentiment predictions. |
| | Evaluate how well FinBERT's financial news sentiment aligns with actual stock market movements. |
| | """) |
| |
|
| | |
| | st.sidebar.header("π Evaluation Framework") |
| | st.sidebar.markdown(""" |
| | **Dual-Period Analysis:** |
| | - **Same-Day**: Intraday return (Close - Open) |
| | - **Next 24h**: Close-to-close follow-through |
| | - **Combined**: Complete market reaction picture |
| | |
| | **Volatility-Aware Evaluation:** |
| | - Uses each stock's 14-day volatility |
| | - Threshold = 1.0 Γ volatility (k=1.0) |
| | - Adapts to stock movement patterns |
| | |
| | **Directional Alignment Score:** |
| | - Graded 0-1 score (not binary) |
| | - Based on same-day return vs threshold |
| | - Higher = better alignment |
| | |
| | **Alpha Analysis:** |
| | - Stock return vs market performance |
| | - Isolates stock-specific impact |
| | - Available for both time periods |
| | """) |
| |
|
| | |
| | volatility_multiplier = 1.0 |
| | confidence_threshold = 0.7 |
| |
|
| | |
| | try: |
| | analyzer, market_service, evaluation_engine = initialize_services() |
| | evaluation_engine.volatility_multiplier = volatility_multiplier |
| | evaluation_engine.confidence_threshold = confidence_threshold |
| | except Exception as e: |
| | st.error(f"Failed to initialize services: {str(e)}") |
| | st.stop() |
| |
|
| | |
| | st.header("π° News Analysis") |
| |
|
| | with st.form("evaluation_form"): |
| | |
| | news_text = st.text_area( |
| | "Financial News Text", |
| | height=150, |
| | placeholder="Enter financial news headline or summary here...", |
| | help="Paste the financial news text you want to analyze" |
| | ) |
| |
|
| | col1, col2 = st.columns(2) |
| |
|
| | with col1: |
| | ticker = st.text_input( |
| | "Stock Ticker", |
| | placeholder="e.g., TSLA, AAPL, MSFT", |
| | help="Enter the stock ticker symbol" |
| | ).upper() |
| |
|
| | with col2: |
| | news_date = st.date_input( |
| | "News Publication Date", |
| | value=date.today() - timedelta(days=1), |
| | max_value=date.today() - timedelta(days=1), |
| | help="Date when the news was published (must be at least 1 day ago)" |
| | ) |
| |
|
| | submitted = st.form_submit_button("π Evaluate Prediction") |
| |
|
| | |
| | if submitted: |
| | if not news_text.strip(): |
| | st.error("Please enter some news text to analyze.") |
| | return |
| |
|
| | if not ticker: |
| | st.error("Please enter a stock ticker symbol.") |
| | return |
| |
|
| | |
| | if not check_rate_limit(): |
| | remaining_time = 30 - (time.time() - st.session_state.last_request_time) |
| | st.warning(f"Rate limit: Please wait {remaining_time:.0f} more seconds before next request.") |
| | return |
| |
|
| | |
| | update_rate_limit() |
| |
|
| | |
| | progress_bar = st.progress(0) |
| | status_text = st.empty() |
| |
|
| | try: |
| | |
| | status_text.text("π€ Analyzing sentiment with FinBERT...") |
| | progress_bar.progress(25) |
| |
|
| | sentiment_result = analyzer.analyze_sentiment(news_text) |
| |
|
| | |
| | status_text.text("π Fetching market data...") |
| | progress_bar.progress(50) |
| |
|
| | news_datetime = datetime.combine(news_date, datetime.min.time()) |
| | market_result = market_service.get_stock_evaluation_data(ticker, news_datetime) |
| |
|
| | |
| | status_text.text("βοΈ Evaluating prediction...") |
| | progress_bar.progress(75) |
| |
|
| | evaluation_result = evaluation_engine.evaluate_prediction( |
| | sentiment_result, market_result, news_datetime |
| | ) |
| |
|
| | |
| | status_text.text("β
Evaluation complete!") |
| | progress_bar.progress(100) |
| |
|
| | |
| | time.sleep(0.5) |
| | progress_bar.empty() |
| | status_text.empty() |
| |
|
| | |
| | st.header("π Evaluation Results") |
| | display_evaluation_result(evaluation_result) |
| |
|
| | |
| | if "error" not in evaluation_result: |
| | st.session_state.evaluation_history.append(evaluation_result) |
| |
|
| | except Exception as e: |
| | progress_bar.empty() |
| | status_text.empty() |
| | st.error(f"Evaluation failed: {str(e)}") |
| | logger.error(f"Evaluation error: {str(e)}") |
| |
|
| | |
| | if st.session_state.evaluation_history: |
| | st.header("π Previous Evaluations") |
| |
|
| | |
| | recent_evaluations = list(reversed(st.session_state.evaluation_history)) |
| |
|
| | |
| | for i, result in enumerate(recent_evaluations): |
| | |
| | alignment_icon = "β
" if result['is_correct'] else "β" |
| | sentiment_icon = {"positive": "π", "negative": "π", "neutral": "β‘οΈ"}.get(result['sentiment'], "β") |
| |
|
| | title = f"{alignment_icon} {result['ticker']} ({result['news_date']}) - {sentiment_icon} {result['sentiment'].title()} β {result['return_24h']:+.1f}% | DAS: {result['das_score']:.3f}" |
| |
|
| | with st.expander(title, expanded=(i==0)): |
| | display_evaluation_result(result) |
| |
|
| | |
| | st.markdown("---") |
| |
|
| | |
| | col1, col2 = st.columns([1, 3]) |
| |
|
| | with col1: |
| | if st.button("ποΈ Clear All History"): |
| | st.session_state.evaluation_history = [] |
| | st.rerun() |
| |
|
| | with col2: |
| | st.caption(f"π {len(st.session_state.evaluation_history)} evaluation(s) completed") |
| |
|
| | |
| | st.markdown("---") |
| | st.caption("π **FinBERT Market Evaluation** | Rate limit: 30s | Model: ProsusAI/finbert | Data: Yahoo Finance") |
| |
|
| | if __name__ == "__main__": |
| | main() |