import streamlit as st import torch import numpy as np import os import tempfile from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline from peft import PeftModel import plotly.express as px import plotly.graph_objects as go import pandas as pd import time from datetime import datetime import re # Fix cache permission issues for Hugging Face Spaces os.environ['HF_HOME'] = tempfile.mkdtemp() os.environ['TRANSFORMERS_CACHE'] = tempfile.mkdtemp() os.environ['HF_HUB_CACHE'] = tempfile.mkdtemp() os.environ['TORCH_HOME'] = tempfile.mkdtemp() # Disable Streamlit telemetry to avoid permission issues os.environ['STREAMLIT_TELEMETRY'] = 'false' # Page configuration st.set_page_config( page_title="LAPEFT Financial Sentiment Analyzer", page_icon="📈", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS for better styling st.markdown(""" """, unsafe_allow_html=True) # Initialize session state if 'analysis_history' not in st.session_state: st.session_state.analysis_history = [] if 'model_loaded' not in st.session_state: st.session_state.model_loaded = False if 'model' not in st.session_state: st.session_state.model = None if 'tokenizer' not in st.session_state: st.session_state.tokenizer = None @st.cache_resource def load_model(): """Load the LAPEFT model with caching for better performance""" try: with st.spinner("🔄 Loading LAPEFT Financial Sentiment Model..."): # Set cache directory to a writable temporary directory cache_dir = tempfile.mkdtemp() # Load tokenizer with custom cache directory tokenizer = AutoTokenizer.from_pretrained( "Hananguyen12/LAPEFT-Financial-Sentiment-Analysis", cache_dir=cache_dir, force_download=False, resume_download=True ) # Load base model with custom cache directory base_model = AutoModelForSequenceClassification.from_pretrained( "bert-base-uncased", num_labels=3, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, cache_dir=cache_dir, force_download=False, resume_download=True ) # Load PEFT adapter with custom cache directory model = PeftModel.from_pretrained( base_model, "Hananguyen12/LAPEFT-Financial-Sentiment-Analysis", cache_dir=cache_dir, force_download=False, resume_download=True ) # Set to evaluation mode model.eval() return model, tokenizer except Exception as e: st.error(f"❌ Error loading LAPEFT model: {str(e)}") st.info("💡 Falling back to a simpler sentiment model...") try: # Try a lightweight fallback model cache_dir = tempfile.mkdtemp() pipe = pipeline( "sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", cache_dir=cache_dir, device=-1 # Force CPU to avoid GPU issues ) return pipe, None except Exception as e2: st.error(f"❌ Fallback model also failed: {str(e2)}") st.info("💡 Using local mock model for demonstration...") # Create a mock model for demonstration return create_mock_model(), None def create_mock_model(): """Create a simple mock sentiment analyzer when models fail to load""" class MockModel: def __call__(self, text): # Simple keyword-based sentiment analysis positive_words = ['profit', 'growth', 'increase', 'up', 'high', 'good', 'strong', 'beat', 'exceed', 'positive', 'bullish', 'rally'] negative_words = ['loss', 'decline', 'decrease', 'down', 'low', 'bad', 'weak', 'miss', 'below', 'negative', 'bearish', 'crash'] text_lower = text.lower() pos_score = sum(1 for word in positive_words if word in text_lower) neg_score = sum(1 for word in negative_words if word in text_lower) if pos_score > neg_score: return [{'label': 'POSITIVE', 'score': 0.7 + min(0.25, pos_score * 0.1)}] elif neg_score > pos_score: return [{'label': 'NEGATIVE', 'score': 0.7 + min(0.25, neg_score * 0.1)}] else: return [{'label': 'NEUTRAL', 'score': 0.6}] return MockModel() def analyze_sentiment(text, model, tokenizer): """Analyze sentiment using the LAPEFT model""" try: # Handle different model types if tokenizer is None: # Fallback pipeline or mock model results = model(text) # Handle different result formats if isinstance(results, list) and len(results) > 0: result = results[0] # Map different label formats to our format label = result.get('label', 'NEUTRAL').upper() if label in ['POSITIVE', 'POS', 'LABEL_2']: sentiment = "positive" elif label in ['NEGATIVE', 'NEG', 'LABEL_0']: sentiment = "negative" else: sentiment = "neutral" confidence = result.get('score', 0.5) # Create probability distribution if sentiment == "positive": probabilities = {"positive": confidence, "neutral": (1-confidence)/2, "negative": (1-confidence)/2} elif sentiment == "negative": probabilities = {"negative": confidence, "neutral": (1-confidence)/2, "positive": (1-confidence)/2} else: probabilities = {"neutral": confidence, "positive": (1-confidence)/2, "negative": (1-confidence)/2} else: # Default fallback sentiment = "neutral" confidence = 0.5 probabilities = {"negative": 0.33, "neutral": 0.34, "positive": 0.33} else: # LAPEFT model # Tokenize input inputs = tokenizer( text, return_tensors="pt", truncation=True, padding=True, max_length=512 ) # Get model predictions with torch.no_grad(): outputs = model(**inputs) predictions = torch.nn.functional.softmax(outputs.logits, dim=-1) predicted_class = torch.argmax(predictions, dim=-1) # Map predictions to labels labels = ["negative", "neutral", "positive"] sentiment = labels[predicted_class.item()] confidence = predictions[0][predicted_class].item() # Get all probabilities probabilities = { "negative": predictions[0][0].item(), "neutral": predictions[0][1].item(), "positive": predictions[0][2].item() } return sentiment, confidence, probabilities except Exception as e: st.error(f"❌ Error during analysis: {str(e)}") # Return neutral sentiment as fallback return "neutral", 0.5, {"negative": 0.33, "neutral": 0.34, "positive": 0.33} def get_confidence_color(confidence): """Get color class based on confidence level""" if confidence >= 0.8: return "confidence-high" elif confidence >= 0.6: return "confidence-medium" else: return "confidence-low" def get_sentiment_emoji(sentiment): """Get emoji for sentiment""" emoji_map = { "positive": "📈", "negative": "📉", "neutral": "➖" } return emoji_map.get(sentiment, "❓") def create_probability_chart(probabilities): """Create a probability distribution chart""" df = pd.DataFrame(list(probabilities.items()), columns=['Sentiment', 'Probability']) colors = {'negative': '#dc3545', 'neutral': '#ffc107', 'positive': '#28a745'} df['Color'] = df['Sentiment'].map(colors) fig = px.bar( df, x='Sentiment', y='Probability', color='Sentiment', color_discrete_map=colors, title="Sentiment Probability Distribution" ) fig.update_layout( showlegend=False, height=400, yaxis_title="Probability", xaxis_title="Sentiment Class" ) return fig def create_history_chart(): """Create a chart showing sentiment analysis history""" if not st.session_state.analysis_history: return None df = pd.DataFrame(st.session_state.analysis_history) df['timestamp'] = pd.to_datetime(df['timestamp']) # Count sentiments over time sentiment_counts = df.groupby(['timestamp', 'sentiment']).size().unstack(fill_value=0) fig = go.Figure() colors = {'negative': '#dc3545', 'neutral': '#ffc107', 'positive': '#28a745'} for sentiment in ['negative', 'neutral', 'positive']: if sentiment in sentiment_counts.columns: fig.add_trace(go.Scatter( x=sentiment_counts.index, y=sentiment_counts[sentiment], mode='lines+markers', name=sentiment.capitalize(), line=dict(color=colors[sentiment]) )) fig.update_layout( title="Sentiment Analysis History", xaxis_title="Time", yaxis_title="Count", height=400 ) return fig # Main app def main(): # Header st.markdown('

📈 LAPEFT Financial Sentiment Analyzer

', unsafe_allow_html=True) st.markdown("""

""", unsafe_allow_html=True) # Sidebar with st.sidebar: st.markdown("## 🎛️ Controls") # Model loading if st.button("🔄 Load/Reload Model", type="primary"): st.session_state.model_loaded = False st.cache_resource.clear() st.session_state.model, st.session_state.tokenizer = load_model() st.session_state.model_loaded = True st.success("✅ Model loaded successfully!") st.markdown("---") # Settings st.markdown("## ⚙️ Settings") show_probabilities = st.checkbox("Show probability scores", value=True) show_history = st.checkbox("Show analysis history", value=True) auto_examples = st.checkbox("Show example texts", value=True) st.markdown("---") # Model info st.markdown("## 📊 Model Information") # Check model status if st.session_state.get('model_loaded', False): if st.session_state.tokenizer is not None: model_status = "✅ LAPEFT Model Loaded" model_type = "Full LAPEFT with LoRA + Gated Fusion" else: model_status = "⚠️ Fallback Model Active" model_type = "Simplified Sentiment Model" else: model_status = "❌ Model Not Loaded" model_type = "No Model" st.info(f""" **Status:** {model_status} **Type:** {model_type} **LAPEFT Model Features:** - 🧠 BERT-base-uncased backbone - 🔗 LoRA parameter-efficient fine-tuning - 🎯 Gated fusion mechanism - 📚 Financial lexicon augmentation - 💾 Memory-optimized training **Sentiment Classes:** - 📉 Negative (Bearish) - ➖ Neutral (Factual) - 📈 Positive (Bullish) """) if st.button("🗑️ Clear History"): st.session_state.analysis_history = [] st.success("History cleared!") # Debug info with st.expander("🔧 Debug Info"): st.write("**Environment Variables:**") st.write(f"- HF_HOME: {os.environ.get('HF_HOME', 'Not set')}") st.write(f"- TRANSFORMERS_CACHE: {os.environ.get('TRANSFORMERS_CACHE', 'Not set')}") st.write(f"- PyTorch version: {torch.__version__}") st.write(f"- CUDA available: {torch.cuda.is_available()}") if hasattr(st.session_state, 'model') and st.session_state.model: st.write(f"- Model type: {type(st.session_state.model)}") st.write(f"- Tokenizer available: {st.session_state.tokenizer is not None}") # Load model if not already loaded if not st.session_state.model_loaded: st.session_state.model, st.session_state.tokenizer = load_model() st.session_state.model_loaded = True # Main content area col1, col2 = st.columns([2, 1]) with col1: st.markdown("## 💬 Financial Text Analysis") # Example texts if auto_examples: st.markdown("### 📝 Try These Financial Examples:") # Real examples from financial datasets with labels examples = [ { "text": "Apple's quarterly earnings exceeded analyst expectations, driving stock price up 15%", "category": "📈 Earnings Beat", "expected": "Positive" }, { "text": "The Federal Reserve announced an unexpected interest rate hike, causing market volatility", "category": "🏛️ Fed Policy", "expected": "Negative" }, { "text": "Tesla reported strong delivery numbers for Q3, beating consensus estimates", "category": "📊 Delivery Report", "expected": "Positive" }, { "text": "According to Gran, the company has no plans to move all production to Russia, although that is where the company is growing.", "category": "🏭 Production News", "expected": "Neutral" }, { "text": "Technopolis plans to develop in stages an area of no less than 100,000 square meters in order to host companies working in computer technologies and telecommunications.", "category": "🏢 Development Plan", "expected": "Neutral" }, { "text": "The international electronic industry company Elcoteq has laid off tens of employees from its Tallinn facility", "category": "👥 Layoffs", "expected": "Negative" }, { "text": "With the new production plant the company would increase its capacity to meet the expected increase in demand and would improve the use of raw materials and therefore increase the production profitability.", "category": "🏭 Expansion", "expected": "Positive" }, { "text": "According to the company's updated strategy for the years 2009-2012, Basware targets a long-term net sales growth in the range of 20%-40% with an operating profit margin of 10%-20% of net sales.", "category": "📈 Growth Strategy", "expected": "Positive" }, { "text": "Banking sector faces regulatory headwinds amid rising compliance costs", "category": "🏦 Banking News", "expected": "Negative" }, { "text": "Tech stocks rallied after positive GDP growth data released this morning", "category": "💹 Market Rally", "expected": "Positive" }, { "text": "Oil prices declined sharply due to oversupply concerns in global markets", "category": "🛢️ Commodity News", "expected": "Negative" }, { "text": "The company's guidance for next quarter remains unchanged from previous estimates", "category": "📋 Guidance Update", "expected": "Neutral" } ] # Create expandable sections for different categories col1, col2, col3 = st.columns(3) with col1: st.markdown("**📈 Positive Examples**") positive_examples = [ex for ex in examples if ex["expected"] == "Positive"] for i, example in enumerate(positive_examples): if st.button( f"📈 {example['category']}", key=f"pos_example_{i}", help=f"Expected: {example['expected']} | Click to analyze: {example['text'][:80]}..." ): st.session_state.input_text = example['text'] st.rerun() with col2: st.markdown("**➖ Neutral Examples**") neutral_examples = [ex for ex in examples if ex["expected"] == "Neutral"] for i, example in enumerate(neutral_examples): if st.button( f"➖ {example['category']}", key=f"neu_example_{i}", help=f"Expected: {example['expected']} | Click to analyze: {example['text'][:80]}..." ): st.session_state.input_text = example['text'] st.rerun() with col3: st.markdown("**📉 Negative Examples**") negative_examples = [ex for ex in examples if ex["expected"] == "Negative"] for i, example in enumerate(negative_examples): if st.button( f"📉 {example['category']}", key=f"neg_example_{i}", help=f"Expected: {example['expected']} | Click to analyze: {example['text'][:80]}..." ): st.session_state.input_text = example['text'] st.rerun() # Quick test section st.markdown("#### 🚀 Quick Test Categories") quick_test_cols = st.columns(4) with quick_test_cols[0]: if st.button("🎯 Earnings Season", help="Load earnings-related examples"): earnings_texts = [ex['text'] for ex in examples if 'earnings' in ex['text'].lower() or 'delivery' in ex['text'].lower()] if earnings_texts: st.session_state.input_text = earnings_texts[0] st.rerun() with quick_test_cols[1]: if st.button("🏛️ Fed Policy", help="Load Federal Reserve related examples"): fed_texts = [ex['text'] for ex in examples if 'federal' in ex['text'].lower() or 'rate' in ex['text'].lower()] if fed_texts: st.session_state.input_text = fed_texts[0] st.rerun() with quick_test_cols[2]: if st.button("🏭 Corporate News", help="Load corporate announcement examples"): corp_texts = [ex['text'] for ex in examples if 'company' in ex['text'].lower() and len(ex['text']) > 100] if corp_texts: st.session_state.input_text = corp_texts[0] st.rerun() with quick_test_cols[3]: if st.button("📊 Market Data", help="Load market movement examples"): market_texts = [ex['text'] for ex in examples if any(word in ex['text'].lower() for word in ['stock', 'market', 'rally', 'decline'])] if market_texts: st.session_state.input_text = market_texts[0] st.rerun() st.markdown("---") # Text input input_text = st.text_area( "Enter financial text to analyze:", value=st.session_state.get('input_text', ''), height=150, placeholder="Enter any financial news, earnings report, market analysis, or financial statement here..." ) # Analysis buttons col_btn1, col_btn2, col_btn3 = st.columns(3) with col_btn1: analyze_button = st.button("🔍 Analyze Sentiment", type="primary", disabled=not input_text.strip()) with col_btn2: if st.button("📊 Batch Analysis"): st.session_state.show_batch = True with col_btn3: if st.button("📈 Market Pulse"): st.session_state.show_market_pulse = True # Perform analysis if analyze_button and input_text.strip(): with st.spinner("🔄 Analyzing sentiment..."): sentiment, confidence, probabilities = analyze_sentiment( input_text, st.session_state.model, st.session_state.tokenizer ) # Add to history st.session_state.analysis_history.append({ 'timestamp': datetime.now(), 'text': input_text[:100] + "..." if len(input_text) > 100 else input_text, 'sentiment': sentiment, 'confidence': confidence }) # Display results st.markdown("### 🎯 Analysis Results") # Main sentiment result emoji = get_sentiment_emoji(sentiment) confidence_class = get_confidence_color(confidence) sentiment_html = f"""

{emoji} Sentiment: {sentiment.capitalize()}

Confidence: {confidence:.1%}

""" st.markdown(sentiment_html, unsafe_allow_html=True) # Interpretation with expected vs actual comparison if sentiment == "positive": interpretation = "📈 **Bullish Signal**: This text indicates positive market sentiment, potential upward movement, or favorable financial outlook." elif sentiment == "negative": interpretation = "📉 **Bearish Signal**: This text suggests negative market sentiment, potential downward pressure, or unfavorable conditions." else: interpretation = "➖ **Neutral Signal**: This text appears factual or balanced, without clear directional bias." st.markdown(interpretation) # Show expected vs actual if it's from examples example_expectations = { "Apple's quarterly earnings exceeded analyst expectations": "Positive", "The Federal Reserve announced an unexpected interest rate hike": "Negative", "Tesla reported strong delivery numbers for Q3": "Positive", "According to Gran, the company has no plans to move all production to Russia": "Neutral", "Technopolis plans to develop in stages an area of no less than 100,000 square meters": "Neutral", "The international electronic industry company Elcoteq has laid off tens of employees": "Negative", "With the new production plant the company would increase its capacity": "Positive", "According to the company's updated strategy for the years 2009-2012": "Positive" } # Check if this text matches any of our examples matched_expectation = None for key, expected in example_expectations.items(): if key.lower() in input_text.lower()[:100]: # Check first 100 chars matched_expectation = expected break if matched_expectation: if matched_expectation.lower() == sentiment: st.success(f"✅ **Prediction Matches Expected**: Expected {matched_expectation}, Got {sentiment.capitalize()}") else: st.warning(f"⚠️ **Different from Expected**: Expected {matched_expectation}, Got {sentiment.capitalize()}") st.info("💡 This could indicate model nuance or the complexity of financial sentiment analysis!") # Financial Context Analysis financial_keywords = { 'earnings': '💰 Earnings-related content', 'fed': '🏛️ Federal Reserve policy', 'growth': '📈 Growth-focused narrative', 'layoff': '👥 Employment impact', 'production': '🏭 Production/manufacturing news', 'profit': '💵 Profitability discussion', 'loss': '📉 Loss or decline mentioned', 'increase': '⬆️ Upward trend indicated', 'decrease': '⬇️ Downward trend indicated', 'strategy': '🎯 Strategic planning content' } detected_themes = [] for keyword, description in financial_keywords.items(): if keyword in input_text.lower(): detected_themes.append(description) if detected_themes: st.markdown("#### 🔍 **Detected Financial Themes:**") for theme in detected_themes[:3]: # Show max 3 themes st.markdown(f"- {theme}") # Market Impact Assessment if confidence >= 0.8: impact_level = "🔥 **HIGH IMPACT** - Strong directional signal" elif confidence >= 0.6: impact_level = "⚡ **MEDIUM IMPACT** - Moderate directional signal" else: impact_level = "💭 **LOW IMPACT** - Weak or mixed signal" st.markdown(f"#### 📊 **Market Impact Assessment:**") st.markdown(impact_level) # Show probabilities if enabled if show_probabilities: col_prob1, col_prob2 = st.columns(2) with col_prob1: st.markdown("#### 📊 Probability Breakdown") for sent, prob in probabilities.items(): emoji = get_sentiment_emoji(sent) st.metric(f"{emoji} {sent.capitalize()}", f"{prob:.1%}") with col_prob2: # Probability chart fig = create_probability_chart(probabilities) st.plotly_chart(fig, use_container_width=True) with col2: st.markdown("## 📈 Dashboard") # Key metrics if st.session_state.analysis_history: total_analyses = len(st.session_state.analysis_history) recent_sentiment = st.session_state.analysis_history[-1]['sentiment'] avg_confidence = np.mean([item['confidence'] for item in st.session_state.analysis_history]) st.metric("Total Analyses", total_analyses) st.metric("Last Sentiment", recent_sentiment.capitalize(), delta=None) st.metric("Avg Confidence", f"{avg_confidence:.1%}") # Sentiment distribution sentiment_counts = pd.Series([item['sentiment'] for item in st.session_state.analysis_history]).value_counts() fig_pie = px.pie( values=sentiment_counts.values, names=sentiment_counts.index, title="Sentiment Distribution", color_discrete_map={'negative': '#dc3545', 'neutral': '#ffc107', 'positive': '#28a745'} ) fig_pie.update_layout(height=300) st.plotly_chart(fig_pie, use_container_width=True) # History if show_history and st.session_state.analysis_history: st.markdown("### 📋 Recent Analyses") for i, item in enumerate(reversed(st.session_state.analysis_history[-5:])): emoji = get_sentiment_emoji(item['sentiment']) with st.expander(f"{emoji} {item['sentiment'].capitalize()} - {item['timestamp'].strftime('%H:%M:%S')}"): st.write(f"**Text:** {item['text']}") st.write(f"**Confidence:** {item['confidence']:.1%}") # Batch Analysis Section if st.session_state.get('show_batch', False): st.markdown("---") st.markdown("## 📊 Batch Analysis") col_batch1, col_batch2 = st.columns(2) with col_batch1: batch_text = st.text_area( "Enter multiple texts (one per line):", height=200, placeholder="Line 1: First financial text\nLine 2: Second financial text\n..." ) with col_batch2: if st.button("🚀 Analyze Batch") and batch_text.strip(): texts = [line.strip() for line in batch_text.split('\n') if line.strip()] batch_results = [] progress_bar = st.progress(0) for i, text in enumerate(texts): sentiment, confidence, probabilities = analyze_sentiment( text, st.session_state.model, st.session_state.tokenizer ) batch_results.append({ 'Text': text[:50] + "..." if len(text) > 50 else text, 'Sentiment': sentiment, 'Confidence': f"{confidence:.1%}", 'Positive': f"{probabilities['positive']:.1%}", 'Neutral': f"{probabilities['neutral']:.1%}", 'Negative': f"{probabilities['negative']:.1%}" }) progress_bar.progress((i + 1) / len(texts)) # Display batch results df_results = pd.DataFrame(batch_results) st.dataframe(df_results, use_container_width=True) # Batch summary sentiment_summary = pd.Series([r['Sentiment'] for r in batch_results]).value_counts() fig_batch = px.bar( x=sentiment_summary.index, y=sentiment_summary.values, title="Batch Analysis Summary", color=sentiment_summary.index, color_discrete_map={'negative': '#dc3545', 'neutral': '#ffc107', 'positive': '#28a745'} ) st.plotly_chart(fig_batch, use_container_width=True) # Market Pulse Section if st.session_state.get('show_market_pulse', False): st.markdown("---") st.markdown("## 📈 Market Pulse Generator") market_scenarios = { "Earnings Season": [ "Company reported record quarterly earnings beating all analyst estimates", "Disappointing earnings results led to after-hours trading decline", "Management guidance exceeded expectations for upcoming quarter" ], "Economic Indicators": [ "Inflation data came in higher than expected affecting market sentiment", "GDP growth numbers showed robust economic expansion this quarter", "Employment report indicates strengthening labor market conditions" ], "Market Events": [ "Federal Reserve maintains current interest rate policy stance", "Cryptocurrency markets show increased institutional adoption trends", "Global supply chain disruptions impact manufacturing sector outlook" ] } col_scenario1, col_scenario2 = st.columns(2) with col_scenario1: scenario_type = st.selectbox("Choose Market Scenario:", list(market_scenarios.keys())) with col_scenario2: if st.button("🎲 Generate Market Pulse"): selected_texts = market_scenarios[scenario_type] st.markdown(f"### 📊 {scenario_type} Analysis") results = [] for text in selected_texts: sentiment, confidence, probabilities = analyze_sentiment( text, st.session_state.model, st.session_state.tokenizer ) results.append((text, sentiment, confidence)) # Display results for text, sentiment, confidence in results: emoji = get_sentiment_emoji(sentiment) st.markdown(f""" **{emoji} {text}** *Sentiment: {sentiment.capitalize()} ({confidence:.1%} confidence)* """) st.markdown("---") # Footer st.markdown("---") st.markdown("""

🤖 Powered by LAPEFT | 🔬 Advanced Financial Sentiment Analysis | Model on Hugging Face

""", unsafe_allow_html=True) if __name__ == "__main__": main()