Dmitry Beresnev commited on
Commit
e189a31
Β·
1 Parent(s): 24bc329

init project

Browse files
.env.example ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Financial Platform Environment Variables
2
+
3
+ # DeepSeek API Key (for AI-powered insights)
4
+ DEEPSEEK_API_KEY=your-deepseek-api-key-here
5
+
6
+ # News Service URL (for news aggregation with sentiment analysis)
7
+ NEWS_SERVICE_URL=http://localhost:5000
8
+
9
+ # Alpha Vantage API Key (optional, for forex data)
10
+ ALPHA_VANTAGE_KEY=your-alpha-vantage-key-here
11
+
12
+ # Twitter/X Credentials (for real-time news monitoring via Twikit)
13
+ # Create a Twitter account or use existing credentials
14
+ TWITTER_USERNAME=your-twitter-username
15
+ TWITTER_EMAIL=your-twitter-email@example.com
16
+ TWITTER_PASSWORD=your-twitter-password
.gitignore ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ignore virtual environment directories
2
+ .venv/
3
+ # Ignore environment variable files
4
+ .env
5
+ # Ignore lock files
6
+ uv.lock
7
+ # Ignore Python bytecode files
8
+ *.pyc
9
+ *.pyo
10
+ __pycache__/
11
+ */__pycache__/
12
+ **/__pycache__/
13
+ # Ignore Jupyter Notebook checkpoints
14
+ .ipynb_checkpoints/
15
+ # Ignore IDE specific files
16
+ .idea/
17
+ # Ignore logs
18
+ logs/
19
+ # ML model files
20
+ ml_models/
21
+ # Ignore experimental result files
22
+ exp_results/
23
+ # Ignore png and jpg files
24
+ *.png
25
+ *.jpg
26
+ # Ignore .ruff
27
+ .ruff_cache
28
+ # Test files
29
+ test_*
30
+ test_*.py
31
+ *_test.py
32
+ tests/__pycache__/
33
+ # Ignore md files
34
+ *.md
35
+ #
36
+ docs/
37
+ #
38
+ *_example.py
39
+ #
40
+ tests/
41
+ #
42
+ README_old.md
Dockerfile ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.13-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies for Playwright and Chromium
6
+ RUN apt-get update && apt-get install -y \
7
+ # Build tools
8
+ build-essential \
9
+ git \
10
+ # Chromium browser and driver
11
+ chromium \
12
+ chromium-driver \
13
+ # Playwright dependencies
14
+ libnss3 \
15
+ libnspr4 \
16
+ libatk1.0-0 \
17
+ libatk-bridge2.0-0 \
18
+ libcups2 \
19
+ libdrm2 \
20
+ libdbus-1-3 \
21
+ libxkbcommon0 \
22
+ libxcomposite1 \
23
+ libxdamage1 \
24
+ libxfixes3 \
25
+ libxrandr2 \
26
+ libgbm1 \
27
+ libasound2 \
28
+ libatspi2.0-0 \
29
+ libxshmfence1 \
30
+ # Utilities
31
+ curl \
32
+ wget \
33
+ ca-certificates \
34
+ fonts-liberation \
35
+ && rm -rf /var/lib/apt/lists/*
36
+
37
+ # Copy and install Python dependencies
38
+ COPY requirements.txt .
39
+ RUN pip install --no-cache-dir -r requirements.txt
40
+
41
+ # Find Chromium installation and create symlink if needed
42
+ RUN if [ -f /usr/bin/chromium-browser ]; then \
43
+ ln -sf /usr/bin/chromium-browser /usr/bin/chromium; \
44
+ elif [ -f /usr/lib/chromium/chromium ]; then \
45
+ ln -sf /usr/lib/chromium/chromium /usr/bin/chromium; \
46
+ fi
47
+
48
+ # Verify Chromium is accessible
49
+ RUN which chromium || (echo "ERROR: Chromium not found!" && exit 1)
50
+
51
+ # Set Playwright to use system Chromium
52
+ ENV PLAYWRIGHT_BROWSERS_PATH=0
53
+ ENV PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1
54
+ ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium
55
+
56
+ # Copy application code
57
+ COPY . .
58
+
59
+ # Set Streamlit configuration for HuggingFace Spaces
60
+ ENV STREAMLIT_SERVER_PORT=7860
61
+ ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0
62
+ ENV STREAMLIT_SERVER_HEADLESS=true
63
+ ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
64
+
65
+ # Expose Streamlit port
66
+ EXPOSE 7860
67
+
68
+ # Health check
69
+ HEALTHCHECK CMD curl --fail http://localhost:7860/_stcore/health || exit 1
70
+
71
+ # Run Streamlit (corrected app file path from main.py to app.py)
72
+ CMD ["streamlit", "run", "app/app.py", "--server.port=7860", "--server.address=0.0.0.0"]
README.md CHANGED
@@ -1,12 +1,193 @@
1
  ---
2
  title: UnifiedFinancialPlatform
3
- emoji: 🏒
4
- colorFrom: yellow
5
- colorTo: pink
6
  sdk: docker
 
7
  pinned: false
8
  license: apache-2.0
9
- short_description: Unified Financial Platform
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: UnifiedFinancialPlatform
3
+ emoji: πŸ“ˆ
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: docker
7
+ app_port: 7860
8
  pinned: false
9
  license: apache-2.0
10
+ short_description: Unified Financial Platform. Multi-asset analysis with OpenBB and AI insights
11
  ---
12
 
13
+ # Financial Analysis Platform
14
+
15
+ A comprehensive multi-asset financial analysis platform built with Streamlit, providing real-time data, technical indicators, and AI-powered insights.
16
+
17
+ ## Features
18
+
19
+ ### πŸ“ˆ Stock Analysis
20
+ - Real-time stock price data from OpenBB
21
+ - Technical indicators (SMA, EMA, RSI)
22
+ - Company profile and financial statements
23
+ - Revenue and net income trends
24
+ - TradingView chart integration
25
+ - Profitability metrics analysis
26
+
27
+ ### β‚Ώ Cryptocurrency (Coming Soon)
28
+ - Real-time cryptocurrency prices
29
+ - Market cap and 24h volume
30
+ - Technical indicators for crypto assets
31
+ - TradingView crypto charts
32
+
33
+ ### πŸ’± Forex Trading (Coming Soon)
34
+ - Foreign exchange rate analysis
35
+ - Major, minor, and exotic pairs
36
+ - Pip calculator
37
+ - Economic calendar integration
38
+
39
+ ### πŸ” Market Screener (Coming Soon)
40
+ - Multi-criteria filtering
41
+ - Technical pattern recognition
42
+ - Sort by volume, price change, RSI
43
+ - Export results to CSV
44
+
45
+ ### πŸ“° News & AI Dashboard βœ… LIVE
46
+ - **23 Premium Sources** across 4 tiers for comprehensive coverage
47
+ - **Tier 1**: Bloomberg (Γ—2), Reuters, FT, WSJ, The Economist, CNBC, MarketWatch (8 sources)
48
+ - **Tier 2**: BBC World, AFP, Al Jazeera, Politico, DW News (5 sources)
49
+ - **Tier 3**: Federal Reserve (2.0x), ECB (2.0x), Lagarde, BoE, IMF, World Bank, US Treasury (7 sources)
50
+ - **Tier 4**: Zero Hedge, First Squawk, Live Squawk (3 sources)
51
+ - **Low-latency monitoring** with 3-minute cache for trading decisions
52
+ - **Intelligent categorization**: Macro, Markets, Geopolitical
53
+ - **Professional sentiment analysis** (Positive/Negative/Neutral)
54
+ - **Weighted impact scoring**: Source credibility Γ— engagement Γ— recency
55
+ - **Breaking news detection** with instant alerts and priority display
56
+ - **Smart filtering** by category, sentiment, and impact level
57
+ - **Auto-refresh mode** for continuous monitoring during trading hours
58
+ - Powered by **Twikit** for real-time Twitter/X intelligence (free, no API costs)
59
+
60
+ ## Installation
61
+
62
+ 1. Clone the repository:
63
+ ```bash
64
+ git clone <repository-url>
65
+ cd FinancialPlatform
66
+ ```
67
+
68
+ 2. Install dependencies:
69
+ ```bash
70
+ pip install -r requirements.txt
71
+ ```
72
+
73
+ 3. Create a `.env` file based on `.env.example`:
74
+ ```bash
75
+ cp .env.example .env
76
+ ```
77
+
78
+ 4. Configure your API keys and Twitter credentials in `.env`:
79
+ ```
80
+ DEEPSEEK_API_KEY=your-key-here
81
+ NEWS_SERVICE_URL=http://localhost:5000
82
+ ALPHA_VANTAGE_KEY=your-key-here
83
+
84
+ # Twitter/X Credentials (required for real-time news monitoring)
85
+ TWITTER_USERNAME=your-twitter-username
86
+ TWITTER_EMAIL=your-email@example.com
87
+ TWITTER_PASSWORD=your-password
88
+ ```
89
+
90
+ **Note**: Twitter credentials are required for real-time news monitoring. Without credentials, the system will use demo/mock data.
91
+
92
+ ## Usage
93
+
94
+ Run the application:
95
+ ```bash
96
+ streamlit run app/app.py
97
+ ```
98
+
99
+ The application will open in your default web browser at `http://localhost:8501`.
100
+
101
+ ## Project Structure
102
+
103
+ ```
104
+ FinancialPlatform/
105
+ β”œβ”€β”€ app/
106
+ β”‚ β”œβ”€β”€ app.py # Main landing page
107
+ β”‚ β”œβ”€β”€ pages/
108
+ β”‚ β”‚ β”œβ”€β”€ 01_Stocks.py # Stock analysis page
109
+ β”‚ β”‚ β”œβ”€β”€ 02_Crypto.py # Cryptocurrency analysis
110
+ β”‚ β”‚ β”œβ”€β”€ 03_Forex.py # Forex analysis
111
+ β”‚ β”‚ β”œβ”€β”€ 04_Screener.py # Market screener
112
+ β”‚ β”‚ └── 05_Dashboard.py # News & AI dashboard
113
+ β”‚ β”œβ”€β”€ components/
114
+ β”‚ β”‚ β”œβ”€β”€ chart.py # Chart creation utilities
115
+ β”‚ β”‚ β”œβ”€β”€ data_sources.py # Data fetching functions
116
+ β”‚ β”‚ β”œβ”€β”€ ui.py # UI component functions
117
+ β”‚ β”‚ └── styles.py # Dark theme CSS
118
+ β”‚ └── utils/
119
+ β”‚ β”œβ”€β”€ config.py # Configuration management
120
+ β”‚ └── formatters.py # Data formatting utilities
121
+ β”œβ”€β”€ requirements.txt
122
+ β”œβ”€β”€ .env.example
123
+ └── README.md
124
+ ```
125
+
126
+ ## Technology Stack
127
+
128
+ - **Frontend**: Streamlit
129
+ - **Data Sources**: OpenBB SDK, yfinance
130
+ - **Charting**: Plotly, TradingView widgets
131
+ - **AI**: DeepSeek API (planned)
132
+ - **Styling**: Custom CSS with dark theme
133
+
134
+ ## Features in Development
135
+
136
+ - [ ] Cryptocurrency data integration (Binance API)
137
+ - [ ] Forex data integration (Alpha Vantage)
138
+ - [ ] Market screener with advanced filters
139
+ - [ ] News aggregation service
140
+ - [ ] AI-powered trading insights
141
+ - [ ] Sentiment analysis
142
+ - [ ] Additional technical indicators (MACD, Bollinger Bands, ATR)
143
+
144
+ ## Configuration
145
+
146
+ ### Environment Variables
147
+
148
+ - `DEEPSEEK_API_KEY`: API key for AI-powered insights
149
+ - `NEWS_SERVICE_URL`: URL for news aggregation service
150
+ - `ALPHA_VANTAGE_KEY`: API key for forex data (optional)
151
+
152
+ ### Cache Settings
153
+
154
+ Data caching is configured in `utils/config.py`:
155
+ - Price data: 1 hour TTL
156
+ - Fundamental data: 24 hours TTL
157
+ - News data: 15 minutes TTL
158
+
159
+ ## Deployment
160
+
161
+ ### HuggingFace Spaces
162
+
163
+ This application is optimized for deployment on HuggingFace Spaces:
164
+
165
+ 1. Create a new Space on HuggingFace
166
+ 2. Set the Space type to "Streamlit"
167
+ 3. Add your environment variables in the Space settings:
168
+ - `DEEPSEEK_API_KEY`
169
+ - `NEWS_SERVICE_URL`
170
+ - `ALPHA_VANTAGE_KEY`
171
+ 4. Push your code to the Space repository
172
+
173
+ ### Local Development
174
+
175
+ For local development with hot-reload:
176
+ ```bash
177
+ streamlit run app/app.py --server.runOnSave=true
178
+ ```
179
+
180
+ ## Contributing
181
+
182
+ Contributions are welcome! Please feel free to submit a Pull Request.
183
+
184
+ ## License
185
+
186
+ Apache 2.0 License
187
+
188
+ ## Acknowledgments
189
+
190
+ - OpenBB for financial data API
191
+ - TradingView for chart widgets
192
+ - Streamlit for the amazing web framework
193
+
app/app.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Financial Analysis Dashboard - Main Application Landing Page."""
2
+
3
+ import streamlit as st
4
+ from components.styles import DARK_THEME_CSS
5
+
6
+ # ---- Configuration ----
7
+ st.set_page_config(
8
+ page_title="Financial Dashboard",
9
+ page_icon="πŸ“ˆ",
10
+ layout="wide",
11
+ initial_sidebar_state="expanded",
12
+ menu_items={
13
+ "About": "A professional financial analysis platform with multi-asset support"
14
+ }
15
+ )
16
+
17
+ # ---- Apply Dark Theme ----
18
+ st.markdown(DARK_THEME_CSS, unsafe_allow_html=True)
19
+
20
+ # ---- Header ----
21
+ st.markdown("# πŸ“ˆ Financial Analysis Platform")
22
+ st.markdown("### Professional multi-asset analysis with technical indicators, AI insights, and real-time data")
23
+
24
+ st.markdown("---")
25
+
26
+ # ---- Feature Overview ----
27
+ col1, col2, col3 = st.columns(3)
28
+
29
+ with col1:
30
+ st.markdown("""
31
+ <div style="padding: 1.5rem; background: linear-gradient(135deg, #1f2937 0%, #111827 100%); border-radius: 10px; border: 1px solid #30363d;">
32
+ <h3>πŸ“ˆ Stock Analysis</h3>
33
+ <p>Comprehensive stock analysis with technical indicators, financial metrics, and TradingView charts.</p>
34
+ <ul>
35
+ <li>Real-time price data</li>
36
+ <li>Technical indicators (SMA, EMA, RSI)</li>
37
+ <li>Financial statements</li>
38
+ <li>Company profiles</li>
39
+ </ul>
40
+ </div>
41
+ """, unsafe_allow_html=True)
42
+
43
+ with col2:
44
+ st.markdown("""
45
+ <div style="padding: 1.5rem; background: linear-gradient(135deg, #1f2937 0%, #111827 100%); border-radius: 10px; border: 1px solid #30363d;">
46
+ <h3>β‚Ώ Cryptocurrency</h3>
47
+ <p>Track and analyze major cryptocurrencies with real-time market data.</p>
48
+ <ul>
49
+ <li>BTC, ETH, and major altcoins</li>
50
+ <li>24h volume & market cap</li>
51
+ <li>Price charts & indicators</li>
52
+ <li>Market sentiment</li>
53
+ </ul>
54
+ </div>
55
+ """, unsafe_allow_html=True)
56
+
57
+ with col3:
58
+ st.markdown("""
59
+ <div style="padding: 1.5rem; background: linear-gradient(135deg, #1f2937 0%, #111827 100%); border-radius: 10px; border: 1px solid #30363d;">
60
+ <h3>πŸ’± Forex Trading</h3>
61
+ <p>Foreign exchange analysis for major, minor, and exotic currency pairs.</p>
62
+ <ul>
63
+ <li>Major pairs (EUR/USD, GBP/USD)</li>
64
+ <li>Real-time exchange rates</li>
65
+ <li>Technical analysis</li>
66
+ <li>Pip calculator</li>
67
+ </ul>
68
+ </div>
69
+ """, unsafe_allow_html=True)
70
+
71
+ st.markdown("<br>", unsafe_allow_html=True)
72
+
73
+ col4, col5 = st.columns(2)
74
+
75
+ with col4:
76
+ st.markdown("""
77
+ <div style="padding: 1.5rem; background: linear-gradient(135deg, #1f2937 0%, #111827 100%); border-radius: 10px; border: 1px solid #30363d;">
78
+ <h3>πŸ” Market Screener</h3>
79
+ <p>Advanced screening tools to find investment opportunities across markets.</p>
80
+ <ul>
81
+ <li>Multi-criteria filtering</li>
82
+ <li>Technical pattern recognition</li>
83
+ <li>Sort by volume, price change, RSI</li>
84
+ <li>Export results to CSV</li>
85
+ </ul>
86
+ </div>
87
+ """, unsafe_allow_html=True)
88
+
89
+ with col5:
90
+ st.markdown("""
91
+ <div style="padding: 1.5rem; background: linear-gradient(135deg, #1f2937 0%, #111827 100%); border-radius: 10px; border: 1px solid #30363d;">
92
+ <h3>πŸ€– News & AI Dashboard</h3>
93
+ <p>AI-powered market insights with sentiment analysis and trading recommendations.</p>
94
+ <ul>
95
+ <li>Real-time news aggregation</li>
96
+ <li>Sentiment analysis</li>
97
+ <li>AI trading insights</li>
98
+ <li>Market trend detection</li>
99
+ </ul>
100
+ </div>
101
+ """, unsafe_allow_html=True)
102
+
103
+ st.markdown("---")
104
+
105
+ # ---- Quick Start ----
106
+ st.markdown("## πŸš€ Quick Start")
107
+ st.markdown("Use the sidebar to navigate to different sections:")
108
+
109
+ quick_col1, quick_col2, quick_col3 = st.columns(3)
110
+
111
+ with quick_col1:
112
+ if st.button("πŸ“ˆ Stock Analysis", use_container_width=True):
113
+ st.switch_page("pages/01_Stocks.py")
114
+
115
+ with quick_col2:
116
+ if st.button("β‚Ώ Cryptocurrency", use_container_width=True):
117
+ st.info("Coming soon!")
118
+
119
+ with quick_col3:
120
+ if st.button("πŸ’± Forex Trading", use_container_width=True):
121
+ st.info("Coming soon!")
122
+
123
+ st.markdown("<br>", unsafe_allow_html=True)
124
+
125
+ quick_col4, quick_col5 = st.columns(2)
126
+
127
+ with quick_col4:
128
+ if st.button("πŸ” Market Screener", use_container_width=True):
129
+ st.info("Coming soon!")
130
+
131
+ with quick_col5:
132
+ if st.button("πŸ€– News & AI Dashboard", use_container_width=True):
133
+ st.info("Coming soon!")
134
+
135
+ st.markdown("---")
136
+
137
+ # ---- Sidebar ----
138
+ with st.sidebar:
139
+ st.markdown("## πŸ“‹ Navigation")
140
+ st.info("Select a page from the sidebar to get started.")
141
+
142
+ st.markdown("---")
143
+ st.markdown("## ℹ️ About")
144
+ st.markdown("""
145
+ This platform provides comprehensive financial analysis across multiple asset classes:
146
+
147
+ - **Stocks**: Technical & fundamental analysis
148
+ - **Crypto**: Real-time cryptocurrency tracking
149
+ - **Forex**: Currency pair analysis
150
+ - **Screener**: Find investment opportunities
151
+ - **Dashboard**: AI-powered insights
152
+ """)
153
+
154
+ st.markdown("---")
155
+ st.markdown("### πŸ”§ Features")
156
+ st.markdown("""
157
+ - βœ… Real-time data
158
+ - βœ… Technical indicators
159
+ - βœ… TradingView integration
160
+ - βœ… Dark theme UI
161
+ - βœ… AI-powered insights
162
+ - βœ… News sentiment analysis
163
+ """)
app/charts.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Chart creation utilities for the financial dashboard."""
2
+
3
+ import plotly.graph_objects as go
4
+ import pandas as pd
5
+
6
+
7
+ def get_dark_theme_layout():
8
+ """Get common dark theme layout settings for all charts."""
9
+ return dict(
10
+ plot_bgcolor="#0d1117",
11
+ paper_bgcolor="#0e1117",
12
+ font=dict(color="#e6edf3", size=12, family="Arial, sans-serif"),
13
+ xaxis=dict(
14
+ gridcolor="#30363d",
15
+ showgrid=True,
16
+ zeroline=False,
17
+ color="#8b949e"
18
+ ),
19
+ yaxis=dict(
20
+ gridcolor="#30363d",
21
+ showgrid=True,
22
+ zeroline=False,
23
+ color="#8b949e"
24
+ ),
25
+ legend=dict(
26
+ bgcolor="rgba(13, 17, 23, 0.8)",
27
+ bordercolor="#30363d",
28
+ borderwidth=1,
29
+ font=dict(color="#e6edf3")
30
+ ),
31
+ hoverlabel=dict(
32
+ bgcolor="#0d1117",
33
+ bordercolor="#30363d",
34
+ font=dict(color="#e6edf3")
35
+ )
36
+ )
37
+
38
+
39
+ def create_price_chart(df: pd.DataFrame, symbol: str, period: int) -> go.Figure:
40
+ """Create price chart with SMA and EMA indicators."""
41
+ fig = go.Figure()
42
+
43
+ fig.add_trace(go.Scatter(
44
+ x=df.index, y=df["close"],
45
+ name="Close Price",
46
+ line=dict(color="#0066ff", width=2.5)
47
+ ))
48
+ fig.add_trace(go.Scatter(
49
+ x=df.index, y=df["SMA"],
50
+ name=f"SMA {period}",
51
+ line=dict(color="#00d084", width=2, dash="dash")
52
+ ))
53
+ fig.add_trace(go.Scatter(
54
+ x=df.index, y=df["EMA"],
55
+ name=f"EMA {period}",
56
+ line=dict(color="#ffa500", width=2, dash="dot")
57
+ ))
58
+
59
+ layout = get_dark_theme_layout()
60
+ fig.update_layout(
61
+ title=f"{symbol} - Price with Moving Averages",
62
+ xaxis_title="Date",
63
+ yaxis_title="Price ($)",
64
+ hovermode="x unified",
65
+ template="plotly_dark",
66
+ height=500,
67
+ margin=dict(l=0, r=0, t=40, b=0),
68
+ **layout
69
+ )
70
+
71
+ return fig
72
+
73
+
74
+ def create_rsi_chart(df: pd.DataFrame, symbol: str) -> go.Figure:
75
+ """Create RSI (Relative Strength Index) chart."""
76
+ fig = go.Figure()
77
+
78
+ fig.add_trace(go.Scatter(
79
+ x=df.index, y=df["RSI"],
80
+ name="RSI",
81
+ line=dict(color="#ff3838", width=2.5),
82
+ fill="tozeroy",
83
+ fillcolor="rgba(255, 56, 56, 0.15)"
84
+ ))
85
+
86
+ fig.add_hline(y=70, line_dash="dash", line_color="rgba(255, 165, 0, 0.6)",
87
+ annotation_text="Overbought (70)")
88
+ fig.add_hline(y=30, line_dash="dash", line_color="rgba(0, 208, 132, 0.6)",
89
+ annotation_text="Oversold (30)")
90
+ fig.add_hline(y=50, line_dash="dot", line_color="rgba(139, 148, 158, 0.3)")
91
+
92
+ layout = get_dark_theme_layout()
93
+ layout["yaxis"]["range"] = [0, 100]
94
+
95
+ fig.update_layout(
96
+ title=f"{symbol} - Relative Strength Index (RSI)",
97
+ xaxis_title="Date",
98
+ yaxis_title="RSI",
99
+ hovermode="x unified",
100
+ template="plotly_dark",
101
+ height=500,
102
+ margin=dict(l=0, r=0, t=40, b=0),
103
+ **layout
104
+ )
105
+
106
+ return fig
107
+
108
+
109
+ def create_financial_chart(income_data: pd.DataFrame) -> go.Figure:
110
+ """Create financial revenue and net income chart."""
111
+ fig = go.Figure()
112
+
113
+ fig.add_trace(go.Bar(
114
+ x=income_data['period_ending'],
115
+ y=income_data['total_revenue'],
116
+ name="Total Revenue",
117
+ marker=dict(color='#0066ff', opacity=0.9),
118
+ yaxis='y1'
119
+ ))
120
+
121
+ fig.add_trace(go.Bar(
122
+ x=income_data['period_ending'],
123
+ y=income_data['net_income'],
124
+ name="Net Income",
125
+ marker=dict(color='#00d084', opacity=0.9),
126
+ yaxis='y1'
127
+ ))
128
+
129
+ layout = get_dark_theme_layout()
130
+ fig.update_layout(
131
+ title="Revenue & Net Income (Annual)",
132
+ xaxis_title="Period",
133
+ yaxis_title="Amount ($)",
134
+ hovermode="x unified",
135
+ template="plotly_dark",
136
+ height=400,
137
+ barmode='group',
138
+ margin=dict(l=0, r=0, t=40, b=0),
139
+ **layout
140
+ )
141
+
142
+ return fig
app/components/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Components package for financial platform UI."""
app/components/chart.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Chart creation utilities for the financial dashboard."""
2
+
3
+ import plotly.graph_objects as go
4
+ import pandas as pd
5
+
6
+
7
+ def get_dark_theme_layout():
8
+ """Get common dark theme layout settings for all charts."""
9
+ return dict(
10
+ plot_bgcolor="#0d1117",
11
+ paper_bgcolor="#0e1117",
12
+ font=dict(color="#e6edf3", size=12, family="Arial, sans-serif"),
13
+ xaxis=dict(
14
+ gridcolor="#30363d",
15
+ showgrid=True,
16
+ zeroline=False,
17
+ color="#8b949e"
18
+ ),
19
+ yaxis=dict(
20
+ gridcolor="#30363d",
21
+ showgrid=True,
22
+ zeroline=False,
23
+ color="#8b949e"
24
+ ),
25
+ legend=dict(
26
+ bgcolor="rgba(13, 17, 23, 0.8)",
27
+ bordercolor="#30363d",
28
+ borderwidth=1,
29
+ font=dict(color="#e6edf3")
30
+ ),
31
+ hoverlabel=dict(
32
+ bgcolor="#0d1117",
33
+ bordercolor="#30363d",
34
+ font=dict(color="#e6edf3")
35
+ )
36
+ )
37
+
38
+
39
+ def create_price_chart(df: pd.DataFrame, symbol: str, period: int) -> go.Figure:
40
+ """Create price chart with SMA and EMA indicators."""
41
+ fig = go.Figure()
42
+
43
+ fig.add_trace(go.Scatter(
44
+ x=df.index, y=df["close"],
45
+ name="Close Price",
46
+ line=dict(color="#0066ff", width=2.5)
47
+ ))
48
+ fig.add_trace(go.Scatter(
49
+ x=df.index, y=df["SMA"],
50
+ name=f"SMA {period}",
51
+ line=dict(color="#00d084", width=2, dash="dash")
52
+ ))
53
+ fig.add_trace(go.Scatter(
54
+ x=df.index, y=df["EMA"],
55
+ name=f"EMA {period}",
56
+ line=dict(color="#ffa500", width=2, dash="dot")
57
+ ))
58
+
59
+ layout = get_dark_theme_layout()
60
+ fig.update_layout(
61
+ title=f"{symbol} - Price with Moving Averages",
62
+ xaxis_title="Date",
63
+ yaxis_title="Price ($)",
64
+ hovermode="x unified",
65
+ template="plotly_dark",
66
+ height=500,
67
+ margin=dict(l=0, r=0, t=40, b=0),
68
+ **layout
69
+ )
70
+
71
+ return fig
72
+
73
+
74
+ def create_rsi_chart(df: pd.DataFrame, symbol: str) -> go.Figure:
75
+ """Create RSI (Relative Strength Index) chart."""
76
+ fig = go.Figure()
77
+
78
+ fig.add_trace(go.Scatter(
79
+ x=df.index, y=df["RSI"],
80
+ name="RSI",
81
+ line=dict(color="#ff3838", width=2.5),
82
+ fill="tozeroy",
83
+ fillcolor="rgba(255, 56, 56, 0.15)"
84
+ ))
85
+
86
+ fig.add_hline(y=70, line_dash="dash", line_color="rgba(255, 165, 0, 0.6)",
87
+ annotation_text="Overbought (70)")
88
+ fig.add_hline(y=30, line_dash="dash", line_color="rgba(0, 208, 132, 0.6)",
89
+ annotation_text="Oversold (30)")
90
+ fig.add_hline(y=50, line_dash="dot", line_color="rgba(139, 148, 158, 0.3)")
91
+
92
+ layout = get_dark_theme_layout()
93
+ layout["yaxis"]["range"] = [0, 100]
94
+
95
+ fig.update_layout(
96
+ title=f"{symbol} - Relative Strength Index (RSI)",
97
+ xaxis_title="Date",
98
+ yaxis_title="RSI",
99
+ hovermode="x unified",
100
+ template="plotly_dark",
101
+ height=500,
102
+ margin=dict(l=0, r=0, t=40, b=0),
103
+ **layout
104
+ )
105
+
106
+ return fig
107
+
108
+
109
+ def create_financial_chart(income_data: pd.DataFrame) -> go.Figure:
110
+ """Create financial revenue and net income chart."""
111
+ fig = go.Figure()
112
+
113
+ fig.add_trace(go.Bar(
114
+ x=income_data['period_ending'],
115
+ y=income_data['total_revenue'],
116
+ name="Total Revenue",
117
+ marker=dict(color='#0066ff', opacity=0.9),
118
+ yaxis='y1'
119
+ ))
120
+
121
+ fig.add_trace(go.Bar(
122
+ x=income_data['period_ending'],
123
+ y=income_data['net_income'],
124
+ name="Net Income",
125
+ marker=dict(color='#00d084', opacity=0.9),
126
+ yaxis='y1'
127
+ ))
128
+
129
+ layout = get_dark_theme_layout()
130
+ fig.update_layout(
131
+ title="Revenue & Net Income (Annual)",
132
+ xaxis_title="Period",
133
+ yaxis_title="Amount ($)",
134
+ hovermode="x unified",
135
+ template="plotly_dark",
136
+ height=400,
137
+ barmode='group',
138
+ margin=dict(l=0, r=0, t=40, b=0),
139
+ **layout
140
+ )
141
+
142
+ return fig
app/components/data_sources.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Data fetching and processing utilities for the financial dashboard."""
2
+
3
+ import pandas as pd
4
+ from openbb import sdk
5
+ import streamlit as st
6
+
7
+
8
+ @st.cache_data(ttl=3600)
9
+ def load_stock_data(symbol: str) -> pd.DataFrame:
10
+ """Load historical stock price data with caching."""
11
+ df = sdk.equity.price.historical(symbol=symbol).to_dataframe()
12
+ return df
13
+
14
+
15
+ @st.cache_data(ttl=86400)
16
+ def load_company_profile(symbol: str):
17
+ """Load company profile information with caching."""
18
+ profile_response = sdk.equity.profile(symbol=symbol)
19
+ profile_info = profile_response.results[0] if hasattr(profile_response, 'results') and profile_response.results else None
20
+ return profile_info
21
+
22
+
23
+ @st.cache_data(ttl=86400)
24
+ def load_income_statement(symbol: str) -> pd.DataFrame:
25
+ """Load company income statement data with caching."""
26
+ income_stmt = sdk.equity.fundamental.income(symbol=symbol).to_dataframe()
27
+ return income_stmt
28
+
29
+
30
+ def calculate_technical_indicators(df: pd.DataFrame, period: int) -> pd.DataFrame:
31
+ """Calculate SMA, EMA, and RSI indicators."""
32
+ df = df.copy()
33
+ df["SMA"] = df["close"].rolling(period).mean()
34
+ df["EMA"] = df["close"].ewm(span=period, adjust=False).mean()
35
+
36
+ # Calculate RSI
37
+ delta = df["close"].diff()
38
+ gain = delta.clip(lower=0)
39
+ loss = -1 * delta.clip(upper=0)
40
+ avg_gain = gain.rolling(period).mean()
41
+ avg_loss = loss.rolling(period).mean()
42
+ rs = avg_gain / avg_loss
43
+ df["RSI"] = 100 - (100 / (1 + rs))
44
+
45
+ return df
46
+
47
+
48
+ def get_price_metrics(df: pd.DataFrame) -> dict:
49
+ """Calculate key price metrics."""
50
+ current_price = df["close"].iloc[-1]
51
+ prev_close = df["close"].iloc[-2] if len(df) > 1 else df["close"].iloc[0]
52
+ price_change = current_price - prev_close
53
+ price_change_pct = (price_change / prev_close) * 100 if prev_close != 0 else 0
54
+
55
+ return {
56
+ "current_price": current_price,
57
+ "price_change": price_change,
58
+ "price_change_pct": price_change_pct,
59
+ "high_52w": df['high'].max(),
60
+ "low_52w": df['low'].min(),
61
+ }
62
+
63
+
64
+ def get_profitability_metrics(income_data: pd.Series) -> dict:
65
+ """Calculate profitability metrics from income statement."""
66
+ total_rev = income_data.get('total_revenue', 0)
67
+ gross_prof = income_data.get('gross_profit', 0)
68
+ net_inc = income_data.get('net_income', 0)
69
+ operating_inc = income_data.get('operating_income', 0)
70
+
71
+ metrics = {}
72
+
73
+ if total_rev and total_rev > 0:
74
+ metrics["gross_margin"] = (gross_prof / total_rev) * 100 if pd.notna(gross_prof) else 0
75
+ metrics["net_margin"] = (net_inc / total_rev) * 100 if pd.notna(net_inc) else 0
76
+ if operating_inc:
77
+ metrics["operating_margin"] = (operating_inc / total_rev) * 100
78
+ else:
79
+ metrics = {"gross_margin": 0, "net_margin": 0}
80
+
81
+ return metrics
app/components/news.py ADDED
@@ -0,0 +1,723 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """News display components for the financial dashboard."""
2
+
3
+ import streamlit as st
4
+ import pandas as pd
5
+ from datetime import datetime
6
+ import html as html_module
7
+
8
+
9
+ def display_tradingview_news_card(news_item: dict):
10
+ """Display a single news card with TradingView-inspired styling."""
11
+
12
+ # Calculate time ago
13
+ time_diff = datetime.now() - news_item['timestamp']
14
+ if time_diff.seconds < 60:
15
+ time_ago = f"{time_diff.seconds}s ago"
16
+ elif time_diff.seconds < 3600:
17
+ time_ago = f"{time_diff.seconds // 60}m ago"
18
+ else:
19
+ hours = time_diff.seconds // 3600
20
+ time_ago = f"{hours}h ago" if hours < 24 else f"{time_diff.days}d ago"
21
+
22
+ # Impact badge colors (TradingView style)
23
+ impact_colors = {
24
+ 'high': '#F23645', # Red
25
+ 'medium': '#FF9800', # Orange
26
+ 'low': '#089981' # Green
27
+ }
28
+
29
+ # Sentiment colors
30
+ sentiment_colors = {
31
+ 'positive': '#089981', # Green
32
+ 'negative': '#F23645', # Red
33
+ 'neutral': '#787B86' # Gray
34
+ }
35
+
36
+ impact_color = impact_colors.get(news_item['impact'], '#787B86')
37
+ sentiment_color = sentiment_colors.get(news_item['sentiment'], '#787B86')
38
+
39
+ # Escape HTML in text
40
+ summary = html_module.escape(news_item.get('summary', '').strip())
41
+ source = html_module.escape(news_item['source'])
42
+ category = html_module.escape(news_item['category'])
43
+ url = html_module.escape(news_item['url'])
44
+
45
+ # TradingView-style card HTML
46
+ card_html = f"""
47
+ <div style="
48
+ background: linear-gradient(135deg, #1E222D 0%, #131722 100%);
49
+ border: 1px solid #2A2E39;
50
+ border-radius: 8px;
51
+ padding: 16px;
52
+ margin-bottom: 12px;
53
+ transition: all 0.2s ease;
54
+ cursor: pointer;
55
+ position: relative;
56
+ overflow: hidden;
57
+ " onmouseover="this.style.borderColor='#3861FB'; this.style.transform='translateY(-2px)'; this.style.boxShadow='0 4px 12px rgba(56, 97, 251, 0.15)';"
58
+ onmouseout="this.style.borderColor='#2A2E39'; this.style.transform='translateY(0)'; this.style.boxShadow='none';">
59
+
60
+ <!-- Left colored indicator bar -->
61
+ <div style="
62
+ position: absolute;
63
+ left: 0;
64
+ top: 0;
65
+ bottom: 0;
66
+ width: 3px;
67
+ background: {impact_color};
68
+ "></div>
69
+
70
+ <!-- Header row -->
71
+ <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px; margin-left: 8px;">
72
+ <div style="display: flex; align-items: center; gap: 8px; flex-wrap: wrap;">
73
+ <span style="
74
+ color: #3861FB;
75
+ font-weight: 600;
76
+ font-size: 13px;
77
+ font-family: -apple-system, BlinkMacSystemFont, 'Trebuchet MS', Roboto, Ubuntu, sans-serif;
78
+ ">{source}</span>
79
+
80
+ <span style="
81
+ background: {impact_color};
82
+ color: white;
83
+ padding: 2px 8px;
84
+ border-radius: 4px;
85
+ font-size: 10px;
86
+ font-weight: 700;
87
+ letter-spacing: 0.5px;
88
+ ">{news_item['impact'].upper()}</span>
89
+
90
+ <span style="
91
+ color: {sentiment_color};
92
+ font-size: 11px;
93
+ font-weight: 600;
94
+ padding: 2px 6px;
95
+ border: 1px solid {sentiment_color};
96
+ border-radius: 4px;
97
+ ">{'β–²' if news_item['sentiment'] == 'positive' else 'β–Ό' if news_item['sentiment'] == 'negative' else '●'} {news_item['sentiment'].upper()}</span>
98
+
99
+ <span style="
100
+ color: #787B86;
101
+ font-size: 11px;
102
+ background: rgba(120, 123, 134, 0.1);
103
+ padding: 2px 6px;
104
+ border-radius: 4px;
105
+ ">#{category}</span>
106
+ </div>
107
+
108
+ <span style="color: #787B86; font-size: 11px; white-space: nowrap;">{time_ago}</span>
109
+ </div>
110
+
111
+ <!-- News summary -->
112
+ <div style="
113
+ color: #D1D4DC;
114
+ font-size: 14px;
115
+ line-height: 1.5;
116
+ margin-bottom: 8px;
117
+ margin-left: 8px;
118
+ font-family: -apple-system, BlinkMacSystemFont, 'Trebuchet MS', Roboto, Ubuntu, sans-serif;
119
+ ">{summary}</div>
120
+
121
+ <!-- Read more link -->
122
+ <a href="{url}" target="_blank" style="
123
+ color: #3861FB;
124
+ font-size: 12px;
125
+ text-decoration: none;
126
+ margin-left: 8px;
127
+ display: inline-flex;
128
+ align-items: center;
129
+ gap: 4px;
130
+ font-weight: 500;
131
+ " onmouseover="this.style.color='#5880FF';" onmouseout="this.style.color='#3861FB';">
132
+ Read Full Story β†’
133
+ </a>
134
+ </div>
135
+ """
136
+
137
+ st.markdown(card_html, unsafe_allow_html=True)
138
+
139
+
140
+ def display_news_card(news_item: dict):
141
+ """Wrapper to maintain compatibility - calls TradingView-style card."""
142
+ display_tradingview_news_card(news_item)
143
+
144
+
145
+ def display_scrollable_news_section(df: pd.DataFrame, section_title: str, section_icon: str,
146
+ section_subtitle: str, max_items: int = 20, height: str = "600px"):
147
+ """Display a scrollable news section with TradingView styling."""
148
+
149
+ if df.empty:
150
+ st.markdown(f"""
151
+ <div style="
152
+ background: linear-gradient(135deg, #1E222D 0%, #131722 100%);
153
+ border: 1px solid #2A2E39;
154
+ border-radius: 8px;
155
+ padding: 20px;
156
+ text-align: center;
157
+ color: #787B86;
158
+ ">
159
+ <p style="font-size: 16px; margin: 0;">πŸ“­ No news available for this section</p>
160
+ </div>
161
+ """, unsafe_allow_html=True)
162
+ return
163
+
164
+ # Build header HTML (no leading whitespace)
165
+ header_html = f"""<div style="background: linear-gradient(135deg, #2A2E39 0%, #1E222D 100%); border: 1px solid #363A45; border-radius: 8px 8px 0 0; padding: 16px 20px; margin-bottom: 0;">
166
+ <div style="display: flex; justify-content: space-between; align-items: center;">
167
+ <div>
168
+ <h3 style="color: #D1D4DC; margin: 0; font-size: 18px; font-weight: 600;">{section_icon} {section_title}</h3>
169
+ <p style="color: #787B86; margin: 4px 0 0 0; font-size: 12px;">{section_subtitle}</p>
170
+ </div>
171
+ <div style="background: rgba(56, 97, 251, 0.15); color: #3861FB; padding: 6px 12px; border-radius: 6px; font-size: 13px; font-weight: 600;">{len(df.head(max_items))} stories</div>
172
+ </div>
173
+ </div>"""
174
+
175
+ # Render header
176
+ st.markdown(header_html, unsafe_allow_html=True)
177
+
178
+ # Build all news cards HTML
179
+ news_cards_html = ""
180
+ for idx, row in df.head(max_items).iterrows():
181
+ news_item = row.to_dict()
182
+
183
+ # Calculate time ago
184
+ time_diff = datetime.now() - news_item['timestamp']
185
+ if time_diff.seconds < 60:
186
+ time_ago = f"{time_diff.seconds}s ago"
187
+ elif time_diff.seconds < 3600:
188
+ time_ago = f"{time_diff.seconds // 60}m ago"
189
+ else:
190
+ hours = time_diff.seconds // 3600
191
+ time_ago = f"{hours}h ago" if hours < 24 else f"{time_diff.days}d ago"
192
+
193
+ # Impact and sentiment colors
194
+ impact_colors = {'high': '#F23645', 'medium': '#FF9800', 'low': '#089981'}
195
+ sentiment_colors = {'positive': '#089981', 'negative': '#F23645', 'neutral': '#787B86'}
196
+
197
+ impact_color = impact_colors.get(news_item['impact'], '#787B86')
198
+ sentiment_color = sentiment_colors.get(news_item['sentiment'], '#787B86')
199
+
200
+ # Escape HTML
201
+ title = html_module.escape(str(news_item.get('title', '')).strip())
202
+ summary = html_module.escape(str(news_item.get('summary', '')).strip())
203
+ source = html_module.escape(news_item['source'])
204
+ category = html_module.escape(news_item['category'])
205
+ url = html_module.escape(news_item['url'])
206
+
207
+ sentiment_symbol = 'β–²' if news_item['sentiment'] == 'positive' else 'β–Ό' if news_item['sentiment'] == 'negative' else '●'
208
+
209
+ # Build card HTML (no leading whitespace)
210
+ news_cards_html += f"""<div style="background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 16px; margin-bottom: 12px; transition: all 0.2s ease; cursor: pointer; position: relative; overflow: hidden;" onmouseover="this.style.borderColor='#3861FB'; this.style.transform='translateY(-2px)'; this.style.boxShadow='0 4px 12px rgba(56, 97, 251, 0.15)';" onmouseout="this.style.borderColor='#2A2E39'; this.style.transform='translateY(0)'; this.style.boxShadow='none';">
211
+ <div style="position: absolute; left: 0; top: 0; bottom: 0; width: 3px; background: {impact_color};"></div>
212
+ <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px; margin-left: 8px;">
213
+ <div style="display: flex; align-items: center; gap: 8px; flex-wrap: wrap;">
214
+ <span style="color: #3861FB; font-weight: 600; font-size: 13px; font-family: -apple-system, BlinkMacSystemFont, 'Trebuchet MS', Roboto, Ubuntu, sans-serif;">{source}</span>
215
+ <span style="background: {impact_color}; color: white; padding: 2px 8px; border-radius: 4px; font-size: 10px; font-weight: 700; letter-spacing: 0.5px;">{news_item['impact'].upper()}</span>
216
+ <span style="color: {sentiment_color}; font-size: 11px; font-weight: 600; padding: 2px 6px; border: 1px solid {sentiment_color}; border-radius: 4px;">{sentiment_symbol} {news_item['sentiment'].upper()}</span>
217
+ <span style="color: #787B86; font-size: 11px; background: rgba(120, 123, 134, 0.1); padding: 2px 6px; border-radius: 4px;">#{category}</span>
218
+ </div>
219
+ <span style="color: #787B86; font-size: 11px; white-space: nowrap;">{time_ago}</span>
220
+ </div>
221
+ <div style="color: #E0E3EB; font-size: 14px; font-weight: 600; margin-bottom: 6px; margin-left: 8px; font-family: -apple-system, BlinkMacSystemFont, 'Trebuchet MS', Roboto, Ubuntu, sans-serif;">{title if title else summary}</div>
222
+ <div style="color: #D1D4DC; font-size: 13px; line-height: 1.5; margin-bottom: 8px; margin-left: 8px; font-family: -apple-system, BlinkMacSystemFont, 'Trebuchet MS', Roboto, Ubuntu, sans-serif;">{summary if title else ''}</div>
223
+ <a href="{url}" target="_blank" style="color: #3861FB; font-size: 12px; text-decoration: none; margin-left: 8px; display: inline-flex; align-items: center; gap: 4px; font-weight: 500;" onmouseover="this.style.color='#5880FF';" onmouseout="this.style.color='#3861FB';">Read Full Story β†’</a>
224
+ </div>
225
+ """
226
+
227
+ # Generate unique class name to avoid conflicts
228
+ import random
229
+ unique_id = f"news-scroll-{random.randint(10000, 99999)}"
230
+
231
+ # Render scrollable container with all news cards using st.markdown (no leading whitespace)
232
+ scrollable_html = f"""<style>
233
+ .{unique_id} {{
234
+ height: {height};
235
+ overflow-y: auto;
236
+ background: #0D0E13;
237
+ border: 1px solid #2A2E39;
238
+ border-top: none;
239
+ border-radius: 0 0 8px 8px;
240
+ padding: 16px;
241
+ }}
242
+ .{unique_id}::-webkit-scrollbar {{
243
+ width: 8px;
244
+ }}
245
+ .{unique_id}::-webkit-scrollbar-track {{
246
+ background: #1E222D;
247
+ border-radius: 4px;
248
+ }}
249
+ .{unique_id}::-webkit-scrollbar-thumb {{
250
+ background: #363A45;
251
+ border-radius: 4px;
252
+ }}
253
+ .{unique_id}::-webkit-scrollbar-thumb:hover {{
254
+ background: #434651;
255
+ }}
256
+ </style>
257
+ <div class="{unique_id}">
258
+ {news_cards_html}
259
+ </div>
260
+ """
261
+
262
+ st.markdown(scrollable_html, unsafe_allow_html=True)
263
+
264
+
265
+ def display_news_feed(df: pd.DataFrame, max_items: int = 20):
266
+ """Display a feed of news items (legacy compatibility)."""
267
+
268
+ if df.empty:
269
+ st.info("πŸ“­ No news available. Adjust your filters or refresh the feed.")
270
+ return
271
+
272
+ # Display news items
273
+ for idx, row in df.head(max_items).iterrows():
274
+ display_tradingview_news_card(row.to_dict())
275
+
276
+
277
+ def display_news_statistics(stats: dict):
278
+ """Display news feed statistics in metric cards."""
279
+
280
+ col1, col2, col3, col4 = st.columns(4)
281
+
282
+ with col1:
283
+ st.metric(
284
+ "Total Stories",
285
+ f"{stats['total']}",
286
+ help="Total news items in feed"
287
+ )
288
+
289
+ with col2:
290
+ st.metric(
291
+ "High Impact",
292
+ f"{stats['high_impact']}",
293
+ delta=f"{(stats['high_impact']/max(stats['total'], 1)*100):.0f}%",
294
+ help="High-impact market-moving news"
295
+ )
296
+
297
+ with col3:
298
+ st.metric(
299
+ "Breaking News",
300
+ f"{stats['breaking']}",
301
+ delta="LIVE" if stats['breaking'] > 0 else None,
302
+ help="Breaking news alerts"
303
+ )
304
+
305
+ with col4:
306
+ st.metric(
307
+ "Last Update",
308
+ stats['last_update'],
309
+ help="Time of last news fetch"
310
+ )
311
+
312
+
313
+ def display_category_breakdown(stats: dict):
314
+ """Display news breakdown by category using Streamlit components."""
315
+
316
+ if 'by_category' not in stats:
317
+ return
318
+
319
+ st.markdown("### πŸ“Š News by Category")
320
+
321
+ categories = stats['by_category']
322
+ total = sum(categories.values())
323
+
324
+ if total == 0:
325
+ st.info("No categorized news available")
326
+ return
327
+
328
+ col1, col2, col3 = st.columns(3)
329
+
330
+ with col1:
331
+ macro_count = categories.get('macro', 0)
332
+ macro_pct = (macro_count / total) * 100
333
+ with st.container():
334
+ st.markdown("**:blue[πŸ“ˆ MACRO]**")
335
+ st.markdown(f"# {macro_count}")
336
+ st.caption(f"{macro_pct:.1f}% of total")
337
+
338
+ with col2:
339
+ geo_count = categories.get('geopolitical', 0)
340
+ geo_pct = (geo_count / total) * 100
341
+ with st.container():
342
+ st.markdown("**:orange[🌍 GEOPOLITICAL]**")
343
+ st.markdown(f"# {geo_count}")
344
+ st.caption(f"{geo_pct:.1f}% of total")
345
+
346
+ with col3:
347
+ markets_count = categories.get('markets', 0)
348
+ markets_pct = (markets_count / total) * 100
349
+ with st.container():
350
+ st.markdown("**:green[πŸ’Ή MARKETS]**")
351
+ st.markdown(f"# {markets_count}")
352
+ st.caption(f"{markets_pct:.1f}% of total")
353
+
354
+
355
+ def display_breaking_news_banner(df: pd.DataFrame):
356
+ """Display breaking news banner at the top with TradingView styling and ML-based impact score."""
357
+
358
+ # With ML-based scoring, we trust that the passed DataFrame already contains
359
+ # the highest-impact news, so no need to filter by is_breaking
360
+ # (The scorer already selected the most impactful news)
361
+ if not df.empty:
362
+ latest = df.iloc[0]
363
+
364
+ # Escape HTML
365
+ summary = html_module.escape(latest.get('summary', '').strip())
366
+ source = html_module.escape(latest['source'])
367
+ url = html_module.escape(latest['url'])
368
+
369
+ # Get impact score if available
370
+ impact_score = latest.get('breaking_score', 0)
371
+ score_display = f"{impact_score:.1f}" if impact_score > 0 else "N/A"
372
+
373
+ # Determine score color and label
374
+ if impact_score >= 80:
375
+ score_color = "#FF3B30" # Critical red
376
+ score_label = "CRITICAL"
377
+ elif impact_score >= 60:
378
+ score_color = "#FF9500" # High orange
379
+ score_label = "HIGH"
380
+ elif impact_score >= 40:
381
+ score_color = "#FFCC00" # Medium yellow
382
+ score_label = "MEDIUM"
383
+ else:
384
+ score_color = "#34C759" # Low green
385
+ score_label = "LOW"
386
+
387
+ # Calculate time ago
388
+ time_diff = datetime.now() - latest['timestamp']
389
+ if time_diff.seconds < 60:
390
+ time_ago = f"{time_diff.seconds}s ago"
391
+ elif time_diff.seconds < 3600:
392
+ time_ago = f"{time_diff.seconds // 60}m ago"
393
+ else:
394
+ hours = time_diff.seconds // 3600
395
+ time_ago = f"{hours}h ago" if hours < 24 else f"{time_diff.days}d ago"
396
+
397
+ # TradingView-style breaking news banner with impact score (no leading whitespace)
398
+ banner_html = f"""<style>
399
+ @keyframes pulse-glow {{
400
+ 0%, 100% {{ box-shadow: 0 0 20px rgba(242, 54, 69, 0.6); }}
401
+ 50% {{ box-shadow: 0 0 30px rgba(242, 54, 69, 0.9); }}
402
+ }}
403
+ @keyframes slide-in {{
404
+ from {{ transform: translateX(-10px); opacity: 0; }}
405
+ to {{ transform: translateX(0); opacity: 1; }}
406
+ }}
407
+ </style>
408
+ <div style="background: linear-gradient(135deg, #F23645 0%, #C91B28 100%); border: 2px solid #FF6B78; border-radius: 12px; padding: 20px 24px; margin-bottom: 24px; animation: pulse-glow 2s ease-in-out infinite; position: relative; overflow: hidden;">
409
+ <div style="position: absolute; top: 0; left: 0; right: 0; bottom: 0; background: repeating-linear-gradient(45deg, transparent, transparent 10px, rgba(255, 255, 255, 0.03) 10px, rgba(255, 255, 255, 0.03) 20px); pointer-events: none;"></div>
410
+ <div style="position: relative; z-index: 1;">
411
+ <div style="display: flex; align-items: center; gap: 16px; margin-bottom: 12px;">
412
+ <div style="font-size: 32px; animation: pulse-glow 1s ease-in-out infinite; filter: drop-shadow(0 2px 8px rgba(0, 0, 0, 0.3));">🚨</div>
413
+ <div style="flex: 1;">
414
+ <div style="color: white; font-size: 14px; font-weight: 700; letter-spacing: 1.5px; text-transform: uppercase; margin-bottom: 4px; font-family: -apple-system, BlinkMacSystemFont, 'Trebuchet MS', Roboto, Ubuntu, sans-serif; text-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);">⚑ Breaking News</div>
415
+ <div style="color: rgba(255, 255, 255, 0.9); font-size: 11px; display: flex; align-items: center; gap: 8px; flex-wrap: wrap;">
416
+ <span style="background: rgba(255, 255, 255, 0.2); padding: 2px 8px; border-radius: 4px; font-weight: 600;">{source}</span>
417
+ <span style="opacity: 0.8;">β€’</span>
418
+ <span style="opacity: 0.8;">{time_ago}</span>
419
+ <span style="opacity: 0.8;">β€’</span>
420
+ <span style="background: {score_color}; color: white; padding: 2px 8px; border-radius: 4px; font-weight: 700; font-size: 10px; letter-spacing: 0.5px;">πŸ“Š IMPACT: {score_display}/100 ({score_label})</span>
421
+ </div>
422
+ </div>
423
+ <a href="{url}" target="_blank" style="background: white; color: #F23645; padding: 10px 20px; border-radius: 6px; font-size: 13px; font-weight: 700; text-decoration: none; display: inline-flex; align-items: center; gap: 6px; transition: all 0.2s ease; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.2);" onmouseover="this.style.transform='translateY(-2px)'; this.style.boxShadow='0 4px 12px rgba(0, 0, 0, 0.3)';" onmouseout="this.style.transform='translateY(0)'; this.style.boxShadow='0 2px 8px rgba(0, 0, 0, 0.2)';">READ NOW β†’</a>
424
+ </div>
425
+ <div style="color: white; font-size: 16px; font-weight: 500; line-height: 1.5; margin-left: 48px; font-family: -apple-system, BlinkMacSystemFont, 'Trebuchet MS', Roboto, Ubuntu, sans-serif; text-shadow: 0 1px 2px rgba(0, 0, 0, 0.2); animation: slide-in 0.5s ease-out;">{summary}</div>
426
+ </div>
427
+ </div>"""
428
+
429
+ st.markdown(banner_html, unsafe_allow_html=True)
430
+
431
+
432
+ def display_prediction_card(prediction_item: dict):
433
+ """Display a single prediction market card with probability visualization."""
434
+
435
+ # Escape HTML in text
436
+ title = html_module.escape(prediction_item.get('title', '').strip())
437
+ source = html_module.escape(prediction_item['source'])
438
+ url = html_module.escape(prediction_item['url'])
439
+
440
+ # Get probabilities
441
+ yes_prob = prediction_item.get('yes_probability', 50.0)
442
+ no_prob = prediction_item.get('no_probability', 50.0)
443
+
444
+ # Determine bar color based on probabilities
445
+ if yes_prob > 60:
446
+ bar_color = '#089981' # Green - likely YES
447
+ sentiment_text = 'YES LIKELY'
448
+ elif no_prob > 60:
449
+ bar_color = '#F23645' # Red - likely NO
450
+ sentiment_text = 'NO LIKELY'
451
+ else:
452
+ bar_color = '#FF9800' # Orange - balanced
453
+ sentiment_text = 'BALANCED'
454
+
455
+ # Format end date if available
456
+ end_date = prediction_item.get('end_date')
457
+ if end_date:
458
+ if isinstance(end_date, str):
459
+ end_date_display = end_date
460
+ else:
461
+ days_until = (end_date - datetime.now()).days
462
+ end_date_display = f"Closes in {days_until}d" if days_until > 0 else "Closed"
463
+ else:
464
+ end_date_display = ""
465
+
466
+ # Volume display
467
+ volume = prediction_item.get('volume', 0)
468
+ if volume > 1000000:
469
+ volume_display = f"${volume/1000000:.1f}M volume"
470
+ elif volume > 1000:
471
+ volume_display = f"${volume/1000:.1f}K volume"
472
+ elif volume > 0:
473
+ volume_display = f"${volume:.0f} volume"
474
+ else:
475
+ volume_display = ""
476
+
477
+ # Prediction card HTML
478
+ card_html = f"""
479
+ <div style="
480
+ background: linear-gradient(135deg, #1E222D 0%, #131722 100%);
481
+ border: 1px solid #2A2E39;
482
+ border-radius: 8px;
483
+ padding: 16px;
484
+ margin-bottom: 12px;
485
+ transition: all 0.2s ease;
486
+ cursor: pointer;
487
+ " onmouseover="this.style.borderColor='#3861FB'; this.style.transform='translateY(-2px)';"
488
+ onmouseout="this.style.borderColor='#2A2E39'; this.style.transform='translateY(0)';">
489
+
490
+ <!-- Header -->
491
+ <div style="margin-bottom: 12px;">
492
+ <div style="display: flex; justify-content: space-between; align-items: flex-start; margin-bottom: 8px;">
493
+ <span style="color: #3861FB; font-weight: 600; font-size: 13px;">{source}</span>
494
+ <span style="
495
+ background: {bar_color};
496
+ color: white;
497
+ padding: 2px 8px;
498
+ border-radius: 4px;
499
+ font-size: 10px;
500
+ font-weight: 700;
501
+ ">{sentiment_text}</span>
502
+ </div>
503
+ <div style="color: #D1D4DC; font-size: 14px; font-weight: 500; line-height: 1.4; margin-bottom: 8px;">
504
+ {title}
505
+ </div>
506
+ </div>
507
+
508
+ <!-- Probability Visualization -->
509
+ <div style="margin-bottom: 10px;">
510
+ <div style="display: flex; justify-content: space-between; margin-bottom: 4px;">
511
+ <span style="color: #089981; font-size: 12px; font-weight: 600;">YES {yes_prob:.1f}%</span>
512
+ <span style="color: #F23645; font-size: 12px; font-weight: 600;">NO {no_prob:.1f}%</span>
513
+ </div>
514
+ <!-- Horizontal probability bar -->
515
+ <div style="
516
+ display: flex;
517
+ height: 8px;
518
+ border-radius: 4px;
519
+ overflow: hidden;
520
+ background: #2A2E39;
521
+ ">
522
+ <div style="
523
+ width: {yes_prob}%;
524
+ background: #089981;
525
+ transition: width 0.3s ease;
526
+ "></div>
527
+ <div style="
528
+ width: {no_prob}%;
529
+ background: #F23645;
530
+ transition: width 0.3s ease;
531
+ "></div>
532
+ </div>
533
+ </div>
534
+
535
+ <!-- Footer info -->
536
+ <div style="display: flex; justify-content: space-between; align-items: center;">
537
+ <div style="color: #787B86; font-size: 11px;">
538
+ {end_date_display}{" β€’ " + volume_display if volume_display and end_date_display else volume_display}
539
+ </div>
540
+ <a href="{url}" target="_blank" style="
541
+ color: #3861FB;
542
+ font-size: 11px;
543
+ font-weight: 600;
544
+ text-decoration: none;
545
+ ">View Market β†’</a>
546
+ </div>
547
+ </div>
548
+ """
549
+
550
+ st.markdown(card_html, unsafe_allow_html=True)
551
+
552
+
553
+ def display_economic_event_card(event_item: dict):
554
+ """Display a single economic event card with forecast/actual comparison."""
555
+
556
+ # Escape HTML
557
+ title = html_module.escape(event_item.get('event_name', event_item.get('title', '')).strip())
558
+ country = html_module.escape(event_item.get('country', 'US'))
559
+ url = html_module.escape(event_item.get('url', ''))
560
+
561
+ # Get values
562
+ forecast = event_item.get('forecast')
563
+ previous = event_item.get('previous')
564
+ actual = event_item.get('actual')
565
+ importance = event_item.get('importance', 'medium')
566
+
567
+ # Importance badge color
568
+ importance_colors = {
569
+ 'high': '#F23645',
570
+ 'medium': '#FF9800',
571
+ 'low': '#787B86'
572
+ }
573
+ importance_color = importance_colors.get(importance, '#787B86')
574
+
575
+ # Time to event
576
+ time_to_event = event_item.get('time_to_event', '')
577
+
578
+ # Format values with unit detection
579
+ def format_value(val):
580
+ if val is None:
581
+ return '-'
582
+ if isinstance(val, (int, float)):
583
+ # Check if it looks like a percentage
584
+ if abs(val) < 100:
585
+ return f"{val:.1f}%"
586
+ else:
587
+ return f"{val:.1f}"
588
+ return str(val)
589
+
590
+ forecast_display = format_value(forecast)
591
+ previous_display = format_value(previous)
592
+ actual_display = format_value(actual)
593
+
594
+ # Determine if beat/miss
595
+ beat_miss_html = ""
596
+ if actual is not None and forecast is not None:
597
+ if actual > forecast:
598
+ beat_miss_html = '<span style="color: #089981; font-weight: 700;">[BEAT]</span>'
599
+ elif actual < forecast:
600
+ beat_miss_html = '<span style="color: #F23645; font-weight: 700;">[MISS]</span>'
601
+
602
+ # Country flag emojis
603
+ country_flags = {
604
+ 'US': 'πŸ‡ΊπŸ‡Έ',
605
+ 'EU': 'πŸ‡ͺπŸ‡Ί',
606
+ 'UK': 'πŸ‡¬πŸ‡§',
607
+ 'JP': 'πŸ‡―πŸ‡΅',
608
+ 'CN': 'πŸ‡¨πŸ‡³',
609
+ 'CA': 'πŸ‡¨πŸ‡¦',
610
+ 'AU': 'πŸ‡¦πŸ‡Ί'
611
+ }
612
+ flag = country_flags.get(country, '🌍')
613
+
614
+ # Event card HTML
615
+ card_html = f"""
616
+ <div style="
617
+ background: linear-gradient(135deg, #1E222D 0%, #131722 100%);
618
+ border: 1px solid #2A2E39;
619
+ border-radius: 8px;
620
+ padding: 16px;
621
+ margin-bottom: 12px;
622
+ transition: all 0.2s ease;
623
+ " onmouseover="this.style.borderColor='#3861FB'; this.style.transform='translateY(-2px)';"
624
+ onmouseout="this.style.borderColor='#2A2E39'; this.style.transform='translateY(0)';">
625
+
626
+ <!-- Header -->
627
+ <div style="display: flex; justify-content: space-between; align-items: flex-start; margin-bottom: 12px;">
628
+ <div style="flex: 1;">
629
+ <div style="display: flex; align-items: center; gap: 8px; margin-bottom: 6px;">
630
+ <span style="font-size: 20px;">{flag}</span>
631
+ <span style="
632
+ background: {importance_color};
633
+ color: white;
634
+ padding: 2px 8px;
635
+ border-radius: 4px;
636
+ font-size: 10px;
637
+ font-weight: 700;
638
+ ">{importance.upper()}</span>
639
+ </div>
640
+ <div style="color: #D1D4DC; font-size: 14px; font-weight: 500; line-height: 1.4;">
641
+ {title}
642
+ </div>
643
+ </div>
644
+ {f'<div style="color: #3861FB; font-size: 12px; font-weight: 600; white-space: nowrap; margin-left: 12px;">{time_to_event}</div>' if time_to_event else ''}
645
+ </div>
646
+
647
+ <!-- Values comparison -->
648
+ <div style="background: #0D0E13; border-radius: 6px; padding: 10px; margin-bottom: 8px;">
649
+ <div style="display: flex; justify-content: space-between; margin-bottom: 6px;">
650
+ <span style="color: #787B86; font-size: 11px;">Forecast:</span>
651
+ <span style="color: #D1D4DC; font-size: 12px; font-weight: 600;">{forecast_display}</span>
652
+ </div>
653
+ <div style="display: flex; justify-content: space-between; margin-bottom: 6px;">
654
+ <span style="color: #787B86; font-size: 11px;">Previous:</span>
655
+ <span style="color: #D1D4DC; font-size: 12px; font-weight: 600;">{previous_display}</span>
656
+ </div>
657
+ {f'<div style="display: flex; justify-content: space-between;"><span style="color: #787B86; font-size: 11px;">Actual:</span><span style="color: #D1D4DC; font-size: 12px; font-weight: 600;">{actual_display} {beat_miss_html}</span></div>' if actual is not None else ''}
658
+ </div>
659
+ </div>
660
+ """
661
+
662
+ st.markdown(card_html, unsafe_allow_html=True)
663
+
664
+
665
+ def display_economic_calendar_widget(events_df: pd.DataFrame):
666
+ """Display economic calendar widget showing upcoming events."""
667
+
668
+ if events_df.empty:
669
+ st.info("πŸ“… No upcoming economic events in the next 7 days")
670
+ return
671
+
672
+ # Build widget HTML with single-line styles (no leading whitespace)
673
+ widget_html = """<div style="background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 12px; padding: 20px; margin-bottom: 20px;">
674
+ <div style="margin-bottom: 16px;">
675
+ <h3 style="color: #D1D4DC; font-size: 18px; font-weight: 600; margin: 0;">πŸ“… Economic Calendar</h3>
676
+ <p style="color: #787B86; font-size: 13px; margin: 4px 0 0 0;">Upcoming high-impact events</p>
677
+ </div>"""
678
+
679
+ # Show top 10 events
680
+ for idx, event in events_df.head(10).iterrows():
681
+ # Get event details
682
+ event_name = html_module.escape(event.get('event_name', event.get('title', '')))
683
+ country = html_module.escape(event.get('country', 'US'))
684
+ importance = event.get('importance', 'medium')
685
+ time_to_event = event.get('time_to_event', '')
686
+ forecast = event.get('forecast')
687
+
688
+ # Country flags
689
+ country_flags = {
690
+ 'US': 'πŸ‡ΊπŸ‡Έ',
691
+ 'EU': 'πŸ‡ͺπŸ‡Ί',
692
+ 'UK': 'πŸ‡¬πŸ‡§',
693
+ 'JP': 'πŸ‡―πŸ‡΅',
694
+ 'CN': 'πŸ‡¨πŸ‡³'
695
+ }
696
+ flag = country_flags.get(country, '🌍')
697
+
698
+ # Importance stars
699
+ stars = '⭐' * ({'high': 3, 'medium': 2, 'low': 1}.get(importance, 1))
700
+
701
+ # Format forecast
702
+ forecast_display = f"{forecast:.1f}" if forecast is not None else "N/A"
703
+
704
+ # Importance color
705
+ importance_color = '#F23645' if importance == 'high' else '#FF9800' if importance == 'medium' else '#787B86'
706
+
707
+ # Build event HTML (no leading whitespace, single-line styles)
708
+ event_html = f"""<div style="background: #0D0E13; border-left: 3px solid {importance_color}; border-radius: 6px; padding: 12px; margin-bottom: 10px;">
709
+ <div style="display: flex; justify-content: space-between; align-items: center;">
710
+ <div style="flex: 1;">
711
+ <div style="color: #D1D4DC; font-size: 13px; font-weight: 500; margin-bottom: 4px;">{flag} {event_name}</div>
712
+ <div style="color: #787B86; font-size: 11px;">{stars} Forecast: {forecast_display}</div>
713
+ </div>
714
+ <div style="color: #3861FB; font-size: 12px; font-weight: 600; white-space: nowrap; margin-left: 12px;">{time_to_event}</div>
715
+ </div>
716
+ </div>
717
+ """
718
+
719
+ widget_html += event_html
720
+
721
+ widget_html += "</div>"
722
+
723
+ st.markdown(widget_html, unsafe_allow_html=True)
app/components/styles.py ADDED
@@ -0,0 +1,331 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Dark theme CSS styles for the financial dashboard."""
2
+
3
+ DARK_THEME_CSS = """
4
+ <style>
5
+ :root {
6
+ --primary-color: #0066ff;
7
+ --secondary-color: #1f77e2;
8
+ --success-color: #00d084;
9
+ --danger-color: #ff3838;
10
+ --warning-color: #ffa500;
11
+ --bg-dark: #0e1117;
12
+ --bg-darker: #010409;
13
+ --text-primary: #e6edf3;
14
+ --text-secondary: #8b949e;
15
+ --border-color: #30363d;
16
+ }
17
+
18
+ /* Main background */
19
+ html, body {
20
+ background-color: var(--bg-darker) !important;
21
+ color: var(--text-primary) !important;
22
+ margin: 0 !important;
23
+ padding: 0 !important;
24
+ }
25
+
26
+ /* Streamlit containers */
27
+ .main, [data-testid="stAppViewContainer"] {
28
+ background-color: var(--bg-dark) !important;
29
+ }
30
+
31
+ /* Hide header and footer */
32
+ [data-testid="stHeader"] {
33
+ background-color: var(--bg-dark) !important;
34
+ }
35
+
36
+ [data-testid="stToolbar"] {
37
+ background-color: var(--bg-dark) !important;
38
+ }
39
+
40
+ .stApp {
41
+ background-color: var(--bg-dark) !important;
42
+ }
43
+
44
+ [data-testid="stDecoration"] {
45
+ background-color: var(--bg-dark) !important;
46
+ }
47
+
48
+ [data-testid="stSidebar"] {
49
+ background-color: #0d1117 !important;
50
+ border-right: 1px solid var(--border-color);
51
+ }
52
+
53
+ /* Text colors */
54
+ p, span, div, h1, h2, h3, h4, h5, h6, label, li, a {
55
+ color: var(--text-primary) !important;
56
+ }
57
+
58
+ /* Headings */
59
+ h1, h2, h3 {
60
+ color: var(--text-primary) !important;
61
+ font-weight: 700 !important;
62
+ }
63
+
64
+ /* Links */
65
+ a {
66
+ color: var(--primary-color) !important;
67
+ text-decoration: none !important;
68
+ }
69
+
70
+ a:hover {
71
+ color: var(--secondary-color) !important;
72
+ text-decoration: underline !important;
73
+ }
74
+
75
+ /* Labels and text inputs */
76
+ label {
77
+ color: var(--text-primary) !important;
78
+ font-weight: 500 !important;
79
+ }
80
+
81
+ /* Paragraph text */
82
+ p {
83
+ color: var(--text-primary) !important;
84
+ line-height: 1.6 !important;
85
+ }
86
+
87
+ /* Metric card styling */
88
+ [data-testid="metric-container"] {
89
+ background: linear-gradient(135deg, #1f2937 0%, #111827 100%) !important;
90
+ border: 1px solid var(--border-color) !important;
91
+ border-radius: 10px !important;
92
+ padding: 1.5rem !important;
93
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.3) !important;
94
+ }
95
+
96
+ .metric-card {
97
+ background: linear-gradient(135deg, #1f2937 0%, #111827 100%);
98
+ padding: 1.5rem;
99
+ border-radius: 10px;
100
+ border: 1px solid var(--border-color);
101
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.3);
102
+ }
103
+
104
+ .metric-value {
105
+ font-size: 2.5rem;
106
+ font-weight: 700;
107
+ color: var(--primary-color);
108
+ margin: 0.5rem 0;
109
+ }
110
+
111
+ .metric-label {
112
+ font-size: 0.875rem;
113
+ color: var(--text-secondary);
114
+ text-transform: uppercase;
115
+ letter-spacing: 0.05em;
116
+ }
117
+
118
+ .section-title {
119
+ color: var(--text-primary);
120
+ border-bottom: 2px solid var(--primary-color);
121
+ padding-bottom: 1rem;
122
+ margin-top: 2rem;
123
+ margin-bottom: 1.5rem;
124
+ }
125
+
126
+ /* Button styling */
127
+ .stButton > button {
128
+ background: linear-gradient(135deg, var(--primary-color) 0%, var(--secondary-color) 100%) !important;
129
+ color: #ffffff !important;
130
+ border: none !important;
131
+ border-radius: 8px !important;
132
+ padding: 0.75rem 2rem !important;
133
+ font-weight: 700 !important;
134
+ transition: all 0.3s ease !important;
135
+ box-shadow: 0 4px 6px rgba(0, 102, 255, 0.2) !important;
136
+ }
137
+
138
+ .stButton > button:hover {
139
+ box-shadow: 0 8px 16px rgba(0, 102, 255, 0.4) !important;
140
+ transform: translateY(-2px) !important;
141
+ }
142
+
143
+ .stButton > button:active {
144
+ transform: translateY(0) !important;
145
+ }
146
+
147
+ /* Input fields */
148
+ [data-testid="stTextInput"] input,
149
+ [data-testid="stSlider"] input {
150
+ background-color: #161b22 !important;
151
+ border: 1px solid var(--border-color) !important;
152
+ color: var(--text-primary) !important;
153
+ border-radius: 6px !important;
154
+ }
155
+
156
+ [data-testid="stTextInput"] input::placeholder {
157
+ color: var(--text-secondary) !important;
158
+ }
159
+
160
+ /* Slider */
161
+ [data-testid="stSlider"] {
162
+ color: var(--primary-color) !important;
163
+ }
164
+
165
+ /* Tabs */
166
+ [data-testid="stTabs"] [role="tablist"] {
167
+ background-color: transparent !important;
168
+ border-bottom: 2px solid var(--border-color) !important;
169
+ }
170
+
171
+ [data-testid="stTabs"] [role="tab"] {
172
+ color: var(--text-secondary) !important;
173
+ background-color: transparent !important;
174
+ border: none !important;
175
+ padding: 1rem 1.5rem !important;
176
+ }
177
+
178
+ [data-testid="stTabs"] [role="tab"][aria-selected="true"] {
179
+ color: var(--primary-color) !important;
180
+ border-bottom: 3px solid var(--primary-color) !important;
181
+ }
182
+
183
+ /* Dataframe */
184
+ [data-testid="dataframe"] {
185
+ background-color: #0d1117 !important;
186
+ }
187
+
188
+ .dataframe {
189
+ background-color: #0d1117 !important;
190
+ color: var(--text-primary) !important;
191
+ }
192
+
193
+ /* Info/Error boxes */
194
+ [data-testid="stInfo"],
195
+ [data-testid="stSuccess"],
196
+ [data-testid="stWarning"],
197
+ [data-testid="stError"] {
198
+ background-color: rgba(0, 102, 255, 0.1) !important;
199
+ border-left: 4px solid var(--primary-color) !important;
200
+ border-radius: 6px !important;
201
+ }
202
+
203
+ [data-testid="stError"] {
204
+ background-color: rgba(255, 56, 56, 0.1) !important;
205
+ border-left-color: var(--danger-color) !important;
206
+ }
207
+
208
+ /* Markdown */
209
+ [data-testid="stMarkdown"] {
210
+ color: var(--text-primary) !important;
211
+ }
212
+
213
+ /* Expander */
214
+ [data-testid="stExpander"] {
215
+ background-color: #161b22 !important;
216
+ border: 1px solid var(--border-color) !important;
217
+ border-radius: 6px !important;
218
+ }
219
+
220
+ /* Metric text styling */
221
+ [data-testid="metric-container"] p {
222
+ color: var(--text-primary) !important;
223
+ }
224
+
225
+ [data-testid="metric-container"] [data-testid="stMetricValue"] {
226
+ color: var(--primary-color) !important;
227
+ font-weight: 700 !important;
228
+ }
229
+
230
+ /* Slider label color */
231
+ [data-testid="stSlider"] label {
232
+ color: var(--text-primary) !important;
233
+ }
234
+
235
+ /* Text input label */
236
+ [data-testid="stTextInput"] label {
237
+ color: var(--text-primary) !important;
238
+ }
239
+
240
+ /* Write and markdown text */
241
+ [data-testid="stMarkdownContainer"] p {
242
+ color: var(--text-primary) !important;
243
+ }
244
+
245
+ [data-testid="stMarkdownContainer"] strong {
246
+ color: var(--primary-color) !important;
247
+ font-weight: 600 !important;
248
+ }
249
+
250
+ /* Spinner text */
251
+ [data-testid="stSpinner"] {
252
+ color: var(--primary-color) !important;
253
+ }
254
+
255
+ /* Column separators */
256
+ hr {
257
+ border-color: var(--border-color) !important;
258
+ }
259
+
260
+ /* Scrollbar */
261
+ ::-webkit-scrollbar {
262
+ width: 8px;
263
+ height: 8px;
264
+ }
265
+
266
+ ::-webkit-scrollbar-track {
267
+ background: #0d1117;
268
+ }
269
+
270
+ ::-webkit-scrollbar-thumb {
271
+ background: var(--border-color);
272
+ border-radius: 4px;
273
+ }
274
+
275
+ ::-webkit-scrollbar-thumb:hover {
276
+ background: var(--primary-color);
277
+ }
278
+
279
+ /* Selection highlighting */
280
+ ::selection {
281
+ background-color: var(--primary-color);
282
+ color: #fff;
283
+ }
284
+
285
+ /* Fix all white backgrounds */
286
+ .stApp > header {
287
+ background-color: var(--bg-dark) !important;
288
+ }
289
+
290
+ .stApp > header::before {
291
+ background: none !important;
292
+ }
293
+
294
+ .stApp > header::after {
295
+ background: none !important;
296
+ }
297
+
298
+ /* Streamlit elements background */
299
+ [data-testid="stVerticalBlock"] {
300
+ background-color: transparent !important;
301
+ }
302
+
303
+ [data-testid="stVerticalBlockBorderWrapper"] {
304
+ background-color: transparent !important;
305
+ }
306
+
307
+ /* Remove white decorative elements */
308
+ .st-emotion-cache-1gvbgyg {
309
+ background-color: var(--bg-dark) !important;
310
+ }
311
+
312
+ .st-emotion-cache-1jicfl2 {
313
+ background-color: var(--bg-dark) !important;
314
+ }
315
+
316
+ /* Ensure all root divs are dark */
317
+ div[class*="st-"] {
318
+ background-color: transparent !important;
319
+ }
320
+
321
+ /* Modal and overlay backgrounds */
322
+ .stModal {
323
+ background-color: var(--bg-dark) !important;
324
+ }
325
+
326
+ /* Alert boxes background */
327
+ .stAlert {
328
+ background-color: rgba(0, 102, 255, 0.1) !important;
329
+ }
330
+ </style>
331
+ """
app/components/ui.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """UI component functions for the financial dashboard."""
2
+
3
+ import streamlit as st
4
+ import pandas as pd
5
+ import sys
6
+ import os
7
+
8
+ # Add parent directory to path for imports
9
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10
+
11
+ from utils.formatters import format_financial_value
12
+ from components.data_sources import get_profitability_metrics
13
+
14
+
15
+ def display_price_metrics(metrics: dict):
16
+ """Display key price metrics in columns."""
17
+ st.markdown('<div class="section-title">πŸ“Š Price Metrics</div>', unsafe_allow_html=True)
18
+
19
+ col1, col2, col3, col4 = st.columns(4)
20
+
21
+ with col1:
22
+ st.metric("Current Price", f"${metrics['current_price']:.2f}",
23
+ f"{metrics['price_change']:+.2f}", delta_color="normal")
24
+
25
+ with col2:
26
+ st.metric("Day Change %", f"{metrics['price_change_pct']:+.2f}%",
27
+ None, delta_color="normal")
28
+
29
+ with col3:
30
+ st.metric("52W High", f"${metrics['high_52w']:.2f}")
31
+
32
+ with col4:
33
+ st.metric("52W Low", f"${metrics['low_52w']:.2f}")
34
+
35
+
36
+ def display_company_info(profile_info):
37
+ """Display company information."""
38
+ st.markdown('<div class="section-title">πŸ“‹ Company Information</div>', unsafe_allow_html=True)
39
+
40
+ if profile_info:
41
+ info_col1, info_col2 = st.columns(2)
42
+ with info_col1:
43
+ st.write(f"**Company Name:** {getattr(profile_info, 'name', 'N/A')}")
44
+ st.write(f"**Sector:** {getattr(profile_info, 'sector', 'N/A')}")
45
+ st.write(f"**Industry:** {getattr(profile_info, 'industry', 'N/A')}")
46
+
47
+ with info_col2:
48
+ st.write(f"**Country:** {getattr(profile_info, 'country', 'N/A')}")
49
+ st.write(f"**Exchange:** {getattr(profile_info, 'exchange', 'N/A')}")
50
+ st.write(f"**Website:** {getattr(profile_info, 'website', 'N/A')}")
51
+
52
+
53
+ def display_financial_metrics(income_stmt: pd.DataFrame):
54
+ """Display financial metrics from income statement."""
55
+ st.markdown('<div class="section-title">πŸ’° Financial Metrics</div>', unsafe_allow_html=True)
56
+
57
+ latest_income = income_stmt.iloc[0] if len(income_stmt) > 0 else None
58
+
59
+ if latest_income is not None:
60
+ # First row of metrics
61
+ fin_col1, fin_col2, fin_col3, fin_col4 = st.columns(4)
62
+
63
+ with fin_col1:
64
+ revenue = latest_income.get('total_revenue', 0)
65
+ if pd.notna(revenue) and revenue > 0:
66
+ st.metric("Total Revenue", format_financial_value(revenue))
67
+ else:
68
+ st.metric("Total Revenue", "N/A")
69
+
70
+ with fin_col2:
71
+ net_income = latest_income.get('net_income', 0)
72
+ if pd.notna(net_income) and net_income > 0:
73
+ st.metric("Net Income", format_financial_value(net_income))
74
+ else:
75
+ st.metric("Net Income", "N/A")
76
+
77
+ with fin_col3:
78
+ gross_profit = latest_income.get('gross_profit', 0)
79
+ if pd.notna(gross_profit) and gross_profit > 0:
80
+ st.metric("Gross Profit", format_financial_value(gross_profit))
81
+ else:
82
+ st.metric("Gross Profit", "N/A")
83
+
84
+ with fin_col4:
85
+ operating_income = latest_income.get('operating_income', 0)
86
+ if pd.notna(operating_income) and operating_income > 0:
87
+ st.metric("Operating Income", format_financial_value(operating_income))
88
+ else:
89
+ st.metric("Operating Income", "N/A")
90
+
91
+ # Second row of metrics
92
+ fin_col5, fin_col6, fin_col7, fin_col8 = st.columns(4)
93
+
94
+ with fin_col5:
95
+ eps = latest_income.get('diluted_earnings_per_share', 0)
96
+ if pd.notna(eps):
97
+ st.metric("EPS (Diluted)", f"${eps:.2f}")
98
+ else:
99
+ st.metric("EPS (Diluted)", "N/A")
100
+
101
+ with fin_col6:
102
+ ebitda = latest_income.get('ebitda', 0)
103
+ if pd.notna(ebitda) and ebitda > 0:
104
+ st.metric("EBITDA", format_financial_value(ebitda))
105
+ else:
106
+ st.metric("EBITDA", "N/A")
107
+
108
+ with fin_col7:
109
+ cogs = latest_income.get('cost_of_revenue', 0)
110
+ if pd.notna(cogs) and cogs > 0:
111
+ st.metric("Cost of Revenue", format_financial_value(cogs))
112
+ else:
113
+ st.metric("Cost of Revenue", "N/A")
114
+
115
+ with fin_col8:
116
+ rd_expense = latest_income.get('research_and_development_expense', 0)
117
+ if pd.notna(rd_expense) and rd_expense > 0:
118
+ st.metric("R&D Expense", format_financial_value(rd_expense))
119
+ else:
120
+ st.metric("R&D Expense", "N/A")
121
+
122
+
123
+ def display_income_statement(income_stmt: pd.DataFrame):
124
+ """Display formatted income statement table."""
125
+ st.markdown("### Income Statement")
126
+
127
+ if not income_stmt.empty:
128
+ display_columns = [
129
+ 'period_ending',
130
+ 'total_revenue',
131
+ 'cost_of_revenue',
132
+ 'gross_profit',
133
+ 'operating_income',
134
+ 'net_income',
135
+ 'diluted_earnings_per_share',
136
+ 'ebitda'
137
+ ]
138
+
139
+ available_cols = [col for col in display_columns if col in income_stmt.columns]
140
+ financial_display = income_stmt[available_cols].copy()
141
+
142
+ for col in financial_display.columns:
143
+ if col != 'period_ending':
144
+ financial_display[col] = financial_display[col].apply(
145
+ lambda x: format_financial_value(x)
146
+ )
147
+
148
+ st.dataframe(financial_display, use_container_width=True, hide_index=True)
149
+
150
+
151
+ def display_profitability_metrics(income_stmt: pd.DataFrame):
152
+ """Display profitability metrics."""
153
+ st.markdown("### Profitability Metrics")
154
+
155
+ prof_col1, prof_col2 = st.columns(2)
156
+ latest_data = income_stmt.iloc[0]
157
+ metrics = get_profitability_metrics(latest_data)
158
+
159
+ with prof_col1:
160
+ if "gross_margin" in metrics:
161
+ st.metric("Gross Margin", f"{metrics['gross_margin']:.2f}%")
162
+ if "net_margin" in metrics:
163
+ st.metric("Net Profit Margin", f"{metrics['net_margin']:.2f}%")
164
+
165
+ with prof_col2:
166
+ if "operating_margin" in metrics:
167
+ st.metric("Operating Margin", f"{metrics['operating_margin']:.2f}%")
168
+
169
+ if len(income_stmt) > 1:
170
+ prev_revenue = income_stmt.iloc[1].get('total_revenue', 0)
171
+ total_rev = latest_data.get('total_revenue', 0)
172
+ if prev_revenue and prev_revenue > 0:
173
+ revenue_growth = ((total_rev - prev_revenue) / prev_revenue) * 100
174
+ st.metric("Revenue Growth (YoY)", f"{revenue_growth:+.2f}%")
app/data.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Data fetching and processing utilities for the financial dashboard."""
2
+
3
+ import pandas as pd
4
+ from openbb import sdk
5
+
6
+
7
+ def load_stock_data(symbol: str) -> pd.DataFrame:
8
+ """Load historical stock price data."""
9
+ df = sdk.equity.price.historical(symbol=symbol).to_dataframe()
10
+ return df
11
+
12
+
13
+ def load_company_profile(symbol: str):
14
+ """Load company profile information."""
15
+ profile_response = sdk.equity.profile(symbol=symbol)
16
+ profile_info = profile_response.results[0] if hasattr(profile_response, 'results') and profile_response.results else None
17
+ return profile_info
18
+
19
+
20
+ def load_income_statement(symbol: str) -> pd.DataFrame:
21
+ """Load company income statement data."""
22
+ income_stmt = sdk.equity.fundamental.income(symbol=symbol).to_dataframe()
23
+ return income_stmt
24
+
25
+
26
+ def calculate_technical_indicators(df: pd.DataFrame, period: int) -> pd.DataFrame:
27
+ """Calculate SMA, EMA, and RSI indicators."""
28
+ df["SMA"] = df["close"].rolling(period).mean()
29
+ df["EMA"] = df["close"].ewm(span=period, adjust=False).mean()
30
+
31
+ # Calculate RSI
32
+ delta = df["close"].diff()
33
+ gain = delta.clip(lower=0)
34
+ loss = -1 * delta.clip(upper=0)
35
+ avg_gain = gain.rolling(period).mean()
36
+ avg_loss = loss.rolling(period).mean()
37
+ rs = avg_gain / avg_loss
38
+ df["RSI"] = 100 - (100 / (1 + rs))
39
+
40
+ return df
41
+
42
+
43
+ def format_financial_value(value) -> str:
44
+ """Format financial values with appropriate units."""
45
+ if pd.isna(value):
46
+ return "N/A"
47
+ if abs(value) >= 1e9:
48
+ return f"${value/1e9:.2f}B"
49
+ elif abs(value) >= 1e6:
50
+ return f"${value/1e6:.2f}M"
51
+ else:
52
+ return f"${value:.2f}"
53
+
54
+
55
+ def get_price_metrics(df: pd.DataFrame) -> dict:
56
+ """Calculate key price metrics."""
57
+ current_price = df["close"].iloc[-1]
58
+ prev_close = df["close"].iloc[-2] if len(df) > 1 else df["close"].iloc[0]
59
+ price_change = current_price - prev_close
60
+ price_change_pct = (price_change / prev_close) * 100 if prev_close != 0 else 0
61
+
62
+ return {
63
+ "current_price": current_price,
64
+ "price_change": price_change,
65
+ "price_change_pct": price_change_pct,
66
+ "high_52w": df['high'].max(),
67
+ "low_52w": df['low'].min(),
68
+ }
69
+
70
+
71
+ def get_profitability_metrics(income_data: pd.Series) -> dict:
72
+ """Calculate profitability metrics from income statement."""
73
+ total_rev = income_data.get('total_revenue', 0)
74
+ gross_prof = income_data.get('gross_profit', 0)
75
+ net_inc = income_data.get('net_income', 0)
76
+ operating_inc = income_data.get('operating_income', 0)
77
+
78
+ metrics = {}
79
+
80
+ if total_rev and total_rev > 0:
81
+ metrics["gross_margin"] = (gross_prof / total_rev) * 100 if pd.notna(gross_prof) else 0
82
+ metrics["net_margin"] = (net_inc / total_rev) * 100 if pd.notna(net_inc) else 0
83
+ if operating_inc:
84
+ metrics["operating_margin"] = (operating_inc / total_rev) * 100
85
+ else:
86
+ metrics = {"gross_margin": 0, "net_margin": 0}
87
+
88
+ return metrics
app/main.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Financial Analysis Dashboard - Main Application."""
2
+
3
+ import streamlit as st
4
+ from dotenv import load_dotenv
5
+ import os
6
+
7
+ from styles import DARK_THEME_CSS
8
+ from data import (
9
+ load_stock_data,
10
+ load_company_profile,
11
+ load_income_statement,
12
+ calculate_technical_indicators,
13
+ get_price_metrics,
14
+ )
15
+ from charts import (
16
+ create_price_chart,
17
+ create_rsi_chart,
18
+ create_financial_chart,
19
+ )
20
+ from ui import (
21
+ display_price_metrics,
22
+ display_company_info,
23
+ display_financial_metrics,
24
+ display_income_statement,
25
+ display_profitability_metrics,
26
+ )
27
+
28
+
29
+ # ---- Configuration ----
30
+ load_dotenv()
31
+ token = os.getenv("TOKEN")
32
+
33
+ st.set_page_config(
34
+ page_title="Financial Dashboard",
35
+ page_icon="πŸ“ˆ",
36
+ layout="wide",
37
+ initial_sidebar_state="expanded",
38
+ menu_items={
39
+ "About": "A professional financial analysis dashboard with technical indicators"
40
+ }
41
+ )
42
+
43
+ # ---- Apply Dark Theme ----
44
+ st.markdown(DARK_THEME_CSS, unsafe_allow_html=True)
45
+
46
+ # ---- Header ----
47
+ st.markdown("# πŸ“ˆ Financial Analysis Dashboard")
48
+ st.markdown("Real-time technical analysis with multiple indicators")
49
+
50
+ # ---- Sidebar Configuration ----
51
+ with st.sidebar:
52
+ st.markdown("## βš™οΈ Settings")
53
+ symbol = st.text_input("Stock Ticker", "AAPL", help="Enter a valid stock ticker symbol").upper()
54
+ period = st.slider("Indicator Period", 5, 50, 20, help="Period for SMA, EMA, and RSI calculations")
55
+
56
+ st.markdown("---")
57
+ st.markdown("### About")
58
+ st.info("This dashboard provides real-time technical analysis with comprehensive financial metrics.")
59
+
60
+
61
+ def main():
62
+ """Main application logic."""
63
+ if st.button("οΏ½οΏ½ Load Dashboard", key="load_btn", use_container_width=True):
64
+ try:
65
+ # Load data
66
+ with st.spinner("Loading data..."):
67
+ df = load_stock_data(symbol)
68
+ profile_info = load_company_profile(symbol)
69
+ income_stmt = load_income_statement(symbol)
70
+
71
+ # Calculate technical indicators
72
+ df = calculate_technical_indicators(df, period)
73
+
74
+ # Display price metrics
75
+ metrics = get_price_metrics(df)
76
+ display_price_metrics(metrics)
77
+
78
+ # Display company information
79
+ display_company_info(profile_info)
80
+
81
+ # Display financial metrics
82
+ if not income_stmt.empty:
83
+ display_financial_metrics(income_stmt)
84
+
85
+ # Financial history chart
86
+ st.markdown('<div class="section-title">πŸ“Š Revenue & Net Income Trend</div>', unsafe_allow_html=True)
87
+ income_chart_data = income_stmt[['period_ending', 'total_revenue', 'net_income']].dropna()
88
+
89
+ if len(income_chart_data) > 0:
90
+ fig_financial = create_financial_chart(income_chart_data)
91
+ st.plotly_chart(fig_financial, use_container_width=True)
92
+
93
+ # ---- Tabs ----
94
+ tab1, tab2, tab3, tab4 = st.tabs([
95
+ "πŸ“ˆ Price & Moving Averages",
96
+ "πŸ“Š RSI Indicator",
97
+ "πŸ“‰ TradingView",
98
+ "πŸ“‹ Financials"
99
+ ])
100
+
101
+ # Tab 1: Price & Moving Averages
102
+ with tab1:
103
+ fig_price = create_price_chart(df, symbol, period)
104
+ st.plotly_chart(fig_price, use_container_width=True)
105
+
106
+ # Tab 2: RSI Indicator
107
+ with tab2:
108
+ fig_rsi = create_rsi_chart(df, symbol)
109
+ st.plotly_chart(fig_rsi, use_container_width=True)
110
+
111
+ # Tab 3: TradingView
112
+ with tab3:
113
+ tradingview_html = f"""
114
+ <div class="tradingview-widget-container">
115
+ <div id="tradingview_{symbol}"></div>
116
+ <script type="text/javascript" src="https://s3.tradingview.com/tv.js"></script>
117
+ <script type="text/javascript">
118
+ new TradingView.widget({{
119
+ "width": "100%",
120
+ "height": 600,
121
+ "symbol": "{symbol}",
122
+ "interval": "D",
123
+ "timezone": "Etc/UTC",
124
+ "theme": "dark",
125
+ "style": "1",
126
+ "locale": "en",
127
+ "enable_publishing": false,
128
+ "allow_symbol_change": true,
129
+ "container_id": "tradingview_{symbol}"
130
+ }});
131
+ </script>
132
+ </div>
133
+ """
134
+ st.components.v1.html(tradingview_html, height=650)
135
+
136
+ # Tab 4: Detailed Financials
137
+ with tab4:
138
+ if not income_stmt.empty:
139
+ display_income_statement(income_stmt)
140
+ display_profitability_metrics(income_stmt)
141
+
142
+ except Exception as e:
143
+ st.error(f"Error loading data for {symbol}: {str(e)}")
144
+ st.info("Please check the ticker symbol and try again.")
145
+
146
+
147
+ if __name__ == "__main__":
148
+ main()
app/pages/01_Stocks.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Stock Analysis Page - Comprehensive stock analysis with technical indicators."""
2
+
3
+ import streamlit as st
4
+ import sys
5
+ import os
6
+
7
+ # Add parent directory to path for imports
8
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
+
10
+ from components.styles import DARK_THEME_CSS
11
+ from components.data_sources import (
12
+ load_stock_data,
13
+ load_company_profile,
14
+ load_income_statement,
15
+ calculate_technical_indicators,
16
+ get_price_metrics,
17
+ )
18
+ from components.chart import (
19
+ create_price_chart,
20
+ create_rsi_chart,
21
+ create_financial_chart,
22
+ )
23
+ from components.ui import (
24
+ display_price_metrics,
25
+ display_company_info,
26
+ display_financial_metrics,
27
+ display_income_statement,
28
+ display_profitability_metrics,
29
+ )
30
+
31
+
32
+ # ---- Page Configuration ----
33
+ st.set_page_config(
34
+ page_title="Stocks - Financial Dashboard",
35
+ page_icon="πŸ“ˆ",
36
+ layout="wide",
37
+ initial_sidebar_state="expanded",
38
+ )
39
+
40
+ # ---- Apply Dark Theme ----
41
+ st.markdown(DARK_THEME_CSS, unsafe_allow_html=True)
42
+
43
+ # ---- Header ----
44
+ st.markdown("# πŸ“ˆ Stock Analysis")
45
+ st.markdown("Real-time technical analysis with comprehensive financial metrics")
46
+
47
+ # ---- Sidebar Configuration ----
48
+ with st.sidebar:
49
+ st.markdown("## βš™οΈ Settings")
50
+ symbol = st.text_input("Stock Ticker", "AAPL", help="Enter a valid stock ticker symbol").upper()
51
+ period = st.slider("Indicator Period", 5, 50, 20, help="Period for SMA, EMA, and RSI calculations")
52
+
53
+ st.markdown("---")
54
+ st.markdown("### About")
55
+ st.info("Analyze stocks with technical indicators, financials, and TradingView charts.")
56
+
57
+
58
+ def main():
59
+ """Main stock analysis logic."""
60
+ if st.button("πŸ“Š Load Stock Data", key="load_btn", use_container_width=True):
61
+ try:
62
+ # Load data
63
+ with st.spinner("Loading data..."):
64
+ df = load_stock_data(symbol)
65
+ profile_info = load_company_profile(symbol)
66
+ income_stmt = load_income_statement(symbol)
67
+
68
+ # Calculate technical indicators
69
+ df = calculate_technical_indicators(df, period)
70
+
71
+ # Display price metrics
72
+ metrics = get_price_metrics(df)
73
+ display_price_metrics(metrics)
74
+
75
+ # Display company information
76
+ display_company_info(profile_info)
77
+
78
+ # Display financial metrics
79
+ if not income_stmt.empty:
80
+ display_financial_metrics(income_stmt)
81
+
82
+ # Financial history chart
83
+ st.markdown('<div class="section-title">πŸ“Š Revenue & Net Income Trend</div>', unsafe_allow_html=True)
84
+ income_chart_data = income_stmt[['period_ending', 'total_revenue', 'net_income']].dropna()
85
+
86
+ if len(income_chart_data) > 0:
87
+ fig_financial = create_financial_chart(income_chart_data)
88
+ st.plotly_chart(fig_financial, use_container_width=True)
89
+
90
+ # ---- Tabs ----
91
+ tab1, tab2, tab3, tab4 = st.tabs([
92
+ "πŸ“ˆ Price & Moving Averages",
93
+ "πŸ“Š RSI Indicator",
94
+ "πŸ“‰ TradingView",
95
+ "πŸ“‹ Financials"
96
+ ])
97
+
98
+ # Tab 1: Price & Moving Averages
99
+ with tab1:
100
+ fig_price = create_price_chart(df, symbol, period)
101
+ st.plotly_chart(fig_price, use_container_width=True)
102
+
103
+ # Tab 2: RSI Indicator
104
+ with tab2:
105
+ fig_rsi = create_rsi_chart(df, symbol)
106
+ st.plotly_chart(fig_rsi, use_container_width=True)
107
+
108
+ # Tab 3: TradingView
109
+ with tab3:
110
+ tradingview_html = f"""
111
+ <div class="tradingview-widget-container">
112
+ <div id="tradingview_{symbol}"></div>
113
+ <script type="text/javascript" src="https://s3.tradingview.com/tv.js"></script>
114
+ <script type="text/javascript">
115
+ new TradingView.widget({{
116
+ "width": "100%",
117
+ "height": 600,
118
+ "symbol": "{symbol}",
119
+ "interval": "D",
120
+ "timezone": "Etc/UTC",
121
+ "theme": "dark",
122
+ "style": "1",
123
+ "locale": "en",
124
+ "enable_publishing": false,
125
+ "allow_symbol_change": true,
126
+ "container_id": "tradingview_{symbol}"
127
+ }});
128
+ </script>
129
+ </div>
130
+ """
131
+ st.components.v1.html(tradingview_html, height=650)
132
+
133
+ # Tab 4: Detailed Financials
134
+ with tab4:
135
+ if not income_stmt.empty:
136
+ display_income_statement(income_stmt)
137
+ display_profitability_metrics(income_stmt)
138
+
139
+ except Exception as e:
140
+ st.error(f"Error loading data for {symbol}: {str(e)}")
141
+ st.info("Please check the ticker symbol and try again.")
142
+
143
+
144
+ if __name__ == "__main__":
145
+ main()
app/pages/02_Crypto.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Cryptocurrency Analysis Page - Track and analyze cryptocurrencies."""
2
+
3
+ import streamlit as st
4
+ import sys
5
+ import os
6
+
7
+ # Add parent directory to path for imports
8
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
+
10
+ from components.styles import DARK_THEME_CSS
11
+
12
+
13
+ # ---- Page Configuration ----
14
+ st.set_page_config(
15
+ page_title="Crypto - Financial Dashboard",
16
+ page_icon="β‚Ώ",
17
+ layout="wide",
18
+ initial_sidebar_state="expanded",
19
+ )
20
+
21
+ # ---- Apply Dark Theme ----
22
+ st.markdown(DARK_THEME_CSS, unsafe_allow_html=True)
23
+
24
+ # ---- Header ----
25
+ st.markdown("# β‚Ώ Cryptocurrency Analysis")
26
+ st.markdown("Track and analyze major cryptocurrencies with real-time market data")
27
+
28
+ st.markdown("---")
29
+
30
+ # ---- Sidebar Configuration ----
31
+ with st.sidebar:
32
+ st.markdown("## βš™οΈ Settings")
33
+ crypto_symbol = st.selectbox(
34
+ "Cryptocurrency",
35
+ ["BTC/USD", "ETH/USD", "BNB/USD", "ADA/USD", "SOL/USD"],
36
+ help="Select a cryptocurrency pair"
37
+ )
38
+ period = st.slider("Indicator Period", 5, 50, 20, help="Period for technical indicators")
39
+
40
+ st.markdown("---")
41
+ st.markdown("### About")
42
+ st.info("Analyze cryptocurrencies with technical indicators and real-time market data.")
43
+
44
+
45
+ # ---- Main Content ----
46
+ st.info("🚧 This page is under development. Cryptocurrency analysis features coming soon!")
47
+
48
+ st.markdown("""
49
+ ### Planned Features:
50
+
51
+ - **Real-time Price Data**: Live cryptocurrency prices from Binance
52
+ - **Market Metrics**: 24h volume, market cap, price changes
53
+ - **Technical Indicators**: SMA, EMA, RSI, MACD for crypto assets
54
+ - **TradingView Charts**: Interactive crypto charts
55
+ - **Market Sentiment**: Community sentiment analysis
56
+ - **Top Movers**: Biggest gainers and losers in 24h
57
+
58
+ Stay tuned for updates!
59
+ """)
60
+
61
+ # Placeholder metrics
62
+ col1, col2, col3, col4 = st.columns(4)
63
+
64
+ with col1:
65
+ st.metric("Current Price", "N/A", "N/A")
66
+
67
+ with col2:
68
+ st.metric("24h Change", "N/A", "N/A")
69
+
70
+ with col3:
71
+ st.metric("24h Volume", "N/A")
72
+
73
+ with col4:
74
+ st.metric("Market Cap", "N/A")
app/pages/03_Forex.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Forex Trading Analysis Page - Analyze foreign exchange pairs."""
2
+
3
+ import streamlit as st
4
+ import sys
5
+ import os
6
+
7
+ # Add parent directory to path for imports
8
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
+
10
+ from components.styles import DARK_THEME_CSS
11
+
12
+
13
+ # ---- Page Configuration ----
14
+ st.set_page_config(
15
+ page_title="Forex - Financial Dashboard",
16
+ page_icon="πŸ’±",
17
+ layout="wide",
18
+ initial_sidebar_state="expanded",
19
+ )
20
+
21
+ # ---- Apply Dark Theme ----
22
+ st.markdown(DARK_THEME_CSS, unsafe_allow_html=True)
23
+
24
+ # ---- Header ----
25
+ st.markdown("# πŸ’± Forex Trading Analysis")
26
+ st.markdown("Foreign exchange analysis for major, minor, and exotic currency pairs")
27
+
28
+ st.markdown("---")
29
+
30
+ # ---- Sidebar Configuration ----
31
+ with st.sidebar:
32
+ st.markdown("## βš™οΈ Settings")
33
+ forex_pair = st.selectbox(
34
+ "Currency Pair",
35
+ ["EUR/USD", "GBP/USD", "USD/JPY", "USD/CHF", "AUD/USD", "USD/CAD"],
36
+ help="Select a forex pair"
37
+ )
38
+ period = st.slider("Indicator Period", 5, 50, 20, help="Period for technical indicators")
39
+
40
+ st.markdown("---")
41
+ st.markdown("### About")
42
+ st.info("Analyze forex pairs with technical indicators and real-time exchange rates.")
43
+
44
+
45
+ # ---- Main Content ----
46
+ st.info("🚧 This page is under development. Forex analysis features coming soon!")
47
+
48
+ st.markdown("""
49
+ ### Planned Features:
50
+
51
+ - **Real-time Exchange Rates**: Live forex rates from multiple sources
52
+ - **Major, Minor & Exotic Pairs**: Comprehensive coverage
53
+ - **Technical Analysis**: Full suite of technical indicators
54
+ - **Pip Calculator**: Calculate pip values for position sizing
55
+ - **Economic Calendar**: Important economic events
56
+ - **TradingView Charts**: Interactive forex charts
57
+
58
+ Stay tuned for updates!
59
+ """)
60
+
61
+ # Placeholder metrics
62
+ col1, col2, col3, col4 = st.columns(4)
63
+
64
+ with col1:
65
+ st.metric("Current Rate", "N/A", "N/A")
66
+
67
+ with col2:
68
+ st.metric("24h Change", "N/A", "N/A")
69
+
70
+ with col3:
71
+ st.metric("Bid Price", "N/A")
72
+
73
+ with col4:
74
+ st.metric("Ask Price", "N/A")
app/pages/04_Screener.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Market Screener Page - Find investment opportunities across markets."""
2
+
3
+ import streamlit as st
4
+ import sys
5
+ import os
6
+
7
+ # Add parent directory to path for imports
8
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
+
10
+ from components.styles import DARK_THEME_CSS
11
+
12
+
13
+ # ---- Page Configuration ----
14
+ st.set_page_config(
15
+ page_title="Screener - Financial Dashboard",
16
+ page_icon="πŸ”",
17
+ layout="wide",
18
+ initial_sidebar_state="expanded",
19
+ )
20
+
21
+ # ---- Apply Dark Theme ----
22
+ st.markdown(DARK_THEME_CSS, unsafe_allow_html=True)
23
+
24
+ # ---- Header ----
25
+ st.markdown("# πŸ” Market Screener")
26
+ st.markdown("Advanced screening tools to find investment opportunities across markets")
27
+
28
+ st.markdown("---")
29
+
30
+ # ---- Sidebar Configuration ----
31
+ with st.sidebar:
32
+ st.markdown("## βš™οΈ Screening Filters")
33
+
34
+ asset_type = st.selectbox(
35
+ "Asset Type",
36
+ ["Stocks", "Crypto", "Forex"],
37
+ help="Select asset type to screen"
38
+ )
39
+
40
+ st.markdown("### Price Filters")
41
+ min_price = st.number_input("Min Price ($)", value=0.0, step=1.0)
42
+ max_price = st.number_input("Max Price ($)", value=1000.0, step=10.0)
43
+
44
+ st.markdown("### Technical Filters")
45
+ rsi_min = st.slider("Min RSI", 0, 100, 30)
46
+ rsi_max = st.slider("Max RSI", 0, 100, 70)
47
+
48
+ volume_min = st.number_input("Min Volume", value=1000000, step=100000)
49
+
50
+ st.markdown("---")
51
+ if st.button("πŸ” Run Screener", use_container_width=True):
52
+ st.info("Screening in progress...")
53
+
54
+
55
+ # ---- Main Content ----
56
+ st.info("🚧 This page is under development. Market screener features coming soon!")
57
+
58
+ st.markdown("""
59
+ ### Planned Features:
60
+
61
+ - **Multi-Asset Screening**: Stocks, crypto, and forex
62
+ - **Technical Filters**: RSI, MACD, moving averages, volume
63
+ - **Fundamental Filters**: P/E ratio, market cap, revenue growth
64
+ - **Pattern Recognition**: Chart patterns and technical setups
65
+ - **Custom Criteria**: Build your own screening rules
66
+ - **Export Results**: Download screening results as CSV
67
+ - **Saved Screens**: Save your favorite screening criteria
68
+
69
+ Stay tuned for updates!
70
+ """)
71
+
72
+ # Placeholder table
73
+ st.markdown("### Screening Results")
74
+ st.info("No screening results yet. Configure filters and run the screener.")
app/pages/05_Dashboard.py ADDED
@@ -0,0 +1,951 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ News & AI Dashboard Page - Real-time Financial Intelligence
3
+ Powered by professional-grade news monitoring with low-latency delivery
4
+ """
5
+
6
+ import streamlit as st
7
+ import sys
8
+ import os
9
+ import logging
10
+
11
+ # Suppress noisy Playwright asyncio errors
12
+ logging.getLogger('asyncio').setLevel(logging.CRITICAL)
13
+ logging.getLogger('playwright').setLevel(logging.WARNING)
14
+
15
+ # Add parent directory to path for imports
16
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
17
+
18
+ from components.styles import DARK_THEME_CSS
19
+ from components.news import (
20
+ display_news_statistics,
21
+ display_category_breakdown,
22
+ display_breaking_news_banner,
23
+ display_scrollable_news_section,
24
+ display_prediction_card,
25
+ display_economic_event_card,
26
+ display_economic_calendar_widget
27
+ )
28
+ from utils.breaking_news_scorer import get_breaking_news_scorer
29
+ from utils.ai_summary_store import init_storage, enqueue_items, fetch_summaries, get_status
30
+ from utils.ai_summary_worker import start_worker_if_needed
31
+
32
+ # Import news scrapers
33
+ try:
34
+ from services.news_scraper import FinanceNewsScraper
35
+ RSS_AVAILABLE = True
36
+ except ImportError:
37
+ RSS_AVAILABLE = False
38
+
39
+ try:
40
+ from services.twitter_news_playwright import TwitterFinanceMonitor
41
+ TWITTER_AVAILABLE = True
42
+ except ImportError:
43
+ TWITTER_AVAILABLE = False
44
+
45
+ try:
46
+ from services.reddit_news import RedditFinanceMonitor
47
+ REDDIT_AVAILABLE = True
48
+ except ImportError:
49
+ REDDIT_AVAILABLE = False
50
+
51
+ try:
52
+ from services.ai_tech_news import AITechNewsScraper
53
+ AI_TECH_AVAILABLE = True
54
+ except ImportError:
55
+ AI_TECH_AVAILABLE = False
56
+
57
+ try:
58
+ from services.prediction_markets import PredictionMarketsScraper
59
+ PREDICTIONS_AVAILABLE = True
60
+ except ImportError:
61
+ PREDICTIONS_AVAILABLE = False
62
+
63
+ try:
64
+ from services.sectoral_news import SectoralNewsScraper
65
+ SECTORAL_AVAILABLE = True
66
+ except ImportError:
67
+ SECTORAL_AVAILABLE = False
68
+
69
+ try:
70
+ from services.market_events import MarketEventsScraper
71
+ EVENTS_AVAILABLE = True
72
+ except ImportError:
73
+ EVENTS_AVAILABLE = False
74
+
75
+ try:
76
+ from services.economic_calendar import EconomicCalendarService
77
+ CALENDAR_AVAILABLE = True
78
+ except ImportError:
79
+ CALENDAR_AVAILABLE = False
80
+
81
+
82
+ # ---- Page Configuration ----
83
+ st.set_page_config(
84
+ page_title="News Dashboard - Financial Platform",
85
+ page_icon="πŸ“°",
86
+ layout="wide",
87
+ initial_sidebar_state="expanded",
88
+ )
89
+
90
+ # ---- Apply Dark Theme ----
91
+ st.markdown(DARK_THEME_CSS, unsafe_allow_html=True)
92
+
93
+ # Initialize news monitors (with caching)
94
+ if 'rss_monitor' not in st.session_state and RSS_AVAILABLE:
95
+ st.session_state.rss_monitor = FinanceNewsScraper()
96
+
97
+ if 'twitter_monitor' not in st.session_state and TWITTER_AVAILABLE:
98
+ st.session_state.twitter_monitor = TwitterFinanceMonitor()
99
+
100
+ if 'reddit_monitor' not in st.session_state and REDDIT_AVAILABLE:
101
+ st.session_state.reddit_monitor = RedditFinanceMonitor()
102
+
103
+ if 'ai_tech_monitor' not in st.session_state and AI_TECH_AVAILABLE:
104
+ st.session_state.ai_tech_monitor = AITechNewsScraper()
105
+
106
+ if 'prediction_markets_monitor' not in st.session_state and PREDICTIONS_AVAILABLE:
107
+ st.session_state.prediction_markets_monitor = PredictionMarketsScraper()
108
+
109
+ if 'sectoral_news_monitor' not in st.session_state and SECTORAL_AVAILABLE:
110
+ st.session_state.sectoral_news_monitor = SectoralNewsScraper()
111
+
112
+ if 'market_events_monitor' not in st.session_state and EVENTS_AVAILABLE:
113
+ st.session_state.market_events_monitor = MarketEventsScraper()
114
+
115
+ if 'economic_calendar_service' not in st.session_state and CALENDAR_AVAILABLE:
116
+ st.session_state.economic_calendar_service = EconomicCalendarService()
117
+
118
+ rss_monitor = st.session_state.get('rss_monitor')
119
+ twitter_monitor = st.session_state.get('twitter_monitor')
120
+ reddit_monitor = st.session_state.get('reddit_monitor')
121
+ ai_tech_monitor = st.session_state.get('ai_tech_monitor')
122
+ prediction_markets_monitor = st.session_state.get('prediction_markets_monitor')
123
+ sectoral_news_monitor = st.session_state.get('sectoral_news_monitor')
124
+ market_events_monitor = st.session_state.get('market_events_monitor')
125
+ economic_calendar_service = st.session_state.get('economic_calendar_service')
126
+
127
+ # Initialize unified cache manager
128
+ if 'news_cache_manager' not in st.session_state:
129
+ from utils.news_cache import NewsCacheManager
130
+ st.session_state.news_cache_manager = NewsCacheManager(default_ttl=180)
131
+
132
+ cache_manager = st.session_state.news_cache_manager
133
+
134
+ # ---- Header ----
135
+ st.markdown("# πŸ€– Live Financial News & AI Dashboard")
136
+ st.markdown("AI-powered market insights with sentiment analysis and trading recommendations. Real-time macro, markets & geopolitical intelligence")
137
+
138
+ st.markdown("---")
139
+
140
+ # ---- Sidebar Filters ----
141
+ with st.sidebar:
142
+ st.markdown("## βš™οΈ News Filters")
143
+
144
+ # Category filter
145
+ category_filter = st.selectbox(
146
+ "Category",
147
+ ["all", "macro", "markets", "geopolitical"],
148
+ format_func=lambda x: x.upper() if x != "all" else "ALL CATEGORIES",
149
+ help="Filter by news category"
150
+ )
151
+
152
+ # Sentiment filter
153
+ sentiment_filter = st.selectbox(
154
+ "Sentiment",
155
+ ["all", "positive", "negative", "neutral"],
156
+ format_func=lambda x: x.upper() if x != "all" else "ALL SENTIMENTS",
157
+ help="Filter by market sentiment"
158
+ )
159
+
160
+ # Impact filter
161
+ impact_filter = st.selectbox(
162
+ "Impact Level",
163
+ ["all", "high", "medium", "low"],
164
+ format_func=lambda x: x.upper() if x != "all" else "ALL IMPACT LEVELS",
165
+ help="Filter by market impact"
166
+ )
167
+
168
+ st.markdown("---")
169
+
170
+ # Refresh controls
171
+ st.markdown("### πŸ”„ Refresh Settings")
172
+
173
+ col1, col2 = st.columns(2)
174
+ with col1:
175
+ if st.button("πŸ”„ Refresh Now", use_container_width=True, type="primary"):
176
+ st.session_state.force_refresh = True
177
+ st.rerun()
178
+
179
+ with col2:
180
+ auto_refresh = st.checkbox("Auto-refresh", value=True, help="Auto-refresh every 3 minutes")
181
+
182
+ if auto_refresh:
183
+ st.info("⏱️ Auto-refresh enabled (3 min)")
184
+
185
+ st.markdown("---")
186
+ st.markdown("### πŸ“Š Feed Statistics")
187
+
188
+ # Get cache statistics from cache manager
189
+ cache_stats = cache_manager.get_statistics()
190
+
191
+ # Calculate totals from cache
192
+ total_stories = (
193
+ cache_stats['twitter']['items'] +
194
+ cache_stats['reddit']['items'] +
195
+ cache_stats['rss']['items'] +
196
+ cache_stats.get('ai_tech', {}).get('items', 0)
197
+ )
198
+
199
+ # Display metrics
200
+ st.metric("Total Stories", total_stories)
201
+ st.metric("Cache Status", "βœ… Active" if total_stories > 0 else "⏳ Loading")
202
+
203
+ # Show cache age for transparency
204
+ if cache_stats['twitter']['is_valid']:
205
+ age = int(cache_stats['twitter']['age_seconds'])
206
+ st.caption(f"πŸ• Cache age: {age}s / 180s")
207
+ else:
208
+ st.caption("πŸ”„ Fetching fresh data...")
209
+
210
+ st.markdown("---")
211
+ st.markdown("### ℹ️ Sources")
212
+
213
+ # Count total sources
214
+ twitter_sources = len(twitter_monitor.SOURCES) if twitter_monitor else 0
215
+ reddit_sources = len(reddit_monitor.SUBREDDITS) if reddit_monitor else 0
216
+ rss_sources = len(rss_monitor.SOURCES) if rss_monitor else 0
217
+ ai_tech_sources = len(ai_tech_monitor.SOURCES) if ai_tech_monitor else 0
218
+ prediction_sources = 3 # Polymarket, Metaculus, CME FedWatch
219
+ sectoral_sources = 7 # 7 sectors
220
+ events_sources = 3 # Earnings, indicators, central banks
221
+ total_sources = twitter_sources + reddit_sources + rss_sources + ai_tech_sources + prediction_sources + sectoral_sources + events_sources
222
+
223
+ st.markdown(f"""
224
+ <div style='font-size: 11px; line-height: 1.6;'>
225
+
226
+ **Twitter/X Accounts ({twitter_sources})**
227
+ β€’ WalterBloomberg β€’ FXHedge β€’ DeItaone
228
+ β€’ Reuters β€’ Bloomberg β€’ FT β€’ WSJ
229
+ β€’ CNBC β€’ BBC β€’ MarketWatch
230
+ β€’ The Economist β€’ AP β€’ AFP
231
+
232
+ **Reddit Communities ({reddit_sources})**
233
+ β€’ r/wallstreetbets β€’ r/stocks β€’ r/investing
234
+ β€’ r/algotrading β€’ r/economics β€’ r/geopolitics
235
+ β€’ r/options β€’ r/SecurityAnalysis
236
+
237
+ **RSS + Web Scraping ({rss_sources})**
238
+ β€’ CNBC β€’ Bloomberg β€’ FT β€’ WSJ
239
+ β€’ BBC β€’ Yahoo Finance β€’ Google News
240
+ β€’ The Economist β€’ Fed (2.0x) β€’ ECB (2.0x) β€’ IMF
241
+
242
+ **AI & Tech Sources ({ai_tech_sources})**
243
+ β€’ OpenAI β€’ Google AI β€’ Microsoft AI β€’ Meta AI
244
+ β€’ DeepMind β€’ Anthropic β€’ AWS AI β€’ NVIDIA
245
+ β€’ TechCrunch β€’ The Verge β€’ VentureBeat
246
+ β€’ MIT Tech Review β€’ Wired β€’ Ars Technica
247
+
248
+ **Prediction Markets ({prediction_sources})**
249
+ β€’ Polymarket β€’ Metaculus β€’ CME FedWatch
250
+
251
+ **Sectoral Coverage ({sectoral_sources})**
252
+ β€’ Finance β€’ Tech β€’ Energy β€’ Healthcare
253
+ β€’ Consumer β€’ Industrials β€’ Real Estate
254
+
255
+ **Market Events ({events_sources})**
256
+ β€’ Earnings Calendar β€’ Economic Indicators
257
+ β€’ Central Bank Events (Fed, ECB, BoE, BoJ)
258
+
259
+ **Total: {total_sources} Premium Sources**
260
+ </div>
261
+ """, unsafe_allow_html=True)
262
+
263
+
264
+ # ---- Main Content Area ----
265
+
266
+ # Check for forced refresh (don't clear yet - wait until after fetching)
267
+ force_refresh = st.session_state.get('force_refresh', False)
268
+
269
+ # Initialize AI summary store/worker (shared across sessions/processes)
270
+ init_storage()
271
+ start_worker_if_needed()
272
+
273
+ # Fetch news from all sources IN PARALLEL for maximum performance
274
+ import pandas as pd
275
+ from concurrent.futures import ThreadPoolExecutor, as_completed
276
+
277
+ twitter_df = pd.DataFrame()
278
+ reddit_df = pd.DataFrame()
279
+ rss_all_df = pd.DataFrame()
280
+ rss_main_df = pd.DataFrame()
281
+ ai_tech_df = pd.DataFrame()
282
+ predictions_df = pd.DataFrame()
283
+ sectoral_news_df = pd.DataFrame()
284
+ market_events_df = pd.DataFrame()
285
+ economic_calendar_df = pd.DataFrame()
286
+
287
+ def fetch_twitter_news():
288
+ """Fetch Twitter/X news via cache manager"""
289
+ try:
290
+ if twitter_monitor:
291
+ # Use cache manager for smart caching
292
+ twitter_news = cache_manager.get_news(
293
+ source='twitter',
294
+ fetcher_func=twitter_monitor.scrape_twitter_news,
295
+ force_refresh=force_refresh,
296
+ max_tweets=50
297
+ )
298
+ if twitter_news:
299
+ df = pd.DataFrame(twitter_news)
300
+ if not df.empty:
301
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
302
+ return df, None
303
+ except Exception as e:
304
+ return pd.DataFrame(), f"Twitter scraping unavailable: {e}"
305
+ return pd.DataFrame(), None
306
+
307
+ def fetch_reddit_news():
308
+ """Fetch Reddit news via cache manager"""
309
+ try:
310
+ if reddit_monitor:
311
+ # Use cache manager for smart caching
312
+ reddit_news = cache_manager.get_news(
313
+ source='reddit',
314
+ fetcher_func=reddit_monitor.scrape_reddit_news,
315
+ force_refresh=force_refresh,
316
+ max_posts=50,
317
+ hours=12
318
+ )
319
+ if reddit_news:
320
+ df = pd.DataFrame(reddit_news)
321
+ if not df.empty:
322
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
323
+ return df, None
324
+ except Exception as e:
325
+ return pd.DataFrame(), f"Reddit scraping unavailable: {e}"
326
+ return pd.DataFrame(), None
327
+
328
+ def fetch_rss_news():
329
+ """Fetch RSS + Web scraped news via cache manager"""
330
+ try:
331
+ if rss_monitor:
332
+ # Use cache manager for smart caching
333
+ rss_news = cache_manager.get_news(
334
+ source='rss',
335
+ fetcher_func=rss_monitor.scrape_news,
336
+ force_refresh=force_refresh,
337
+ max_items=100
338
+ )
339
+ if rss_news:
340
+ df = pd.DataFrame(rss_news)
341
+ if not df.empty:
342
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
343
+ return df, None
344
+ except Exception as e:
345
+ return pd.DataFrame(), f"RSS scraping unavailable: {e}"
346
+ return pd.DataFrame(), None
347
+
348
+ def fetch_ai_tech_news():
349
+ """Fetch AI/Tech news via cache manager"""
350
+ try:
351
+ if ai_tech_monitor:
352
+ # Use cache manager for smart caching
353
+ ai_tech_news = cache_manager.get_news(
354
+ source='ai_tech',
355
+ fetcher_func=ai_tech_monitor.scrape_ai_tech_news,
356
+ force_refresh=force_refresh,
357
+ max_items=100,
358
+ hours=48
359
+ )
360
+ if ai_tech_news:
361
+ df = pd.DataFrame(ai_tech_news)
362
+ if not df.empty:
363
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
364
+ return df, None
365
+ except Exception as e:
366
+ return pd.DataFrame(), f"AI/Tech news unavailable: {e}"
367
+ return pd.DataFrame(), None
368
+
369
+ def fetch_prediction_markets():
370
+ """Fetch prediction market data via cache manager"""
371
+ try:
372
+ if prediction_markets_monitor:
373
+ predictions = cache_manager.get_news(
374
+ source='predictions',
375
+ fetcher_func=prediction_markets_monitor.scrape_predictions,
376
+ force_refresh=force_refresh,
377
+ max_items=50
378
+ )
379
+ if predictions:
380
+ df = pd.DataFrame(predictions)
381
+ if not df.empty:
382
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
383
+ return df, None
384
+ except Exception as e:
385
+ return pd.DataFrame(), f"Prediction markets unavailable: {e}"
386
+ return pd.DataFrame(), None
387
+
388
+ def fetch_sectoral_news():
389
+ """Fetch sectoral news via cache manager"""
390
+ try:
391
+ if sectoral_news_monitor:
392
+ sectoral_news = cache_manager.get_news(
393
+ source='sectoral_news',
394
+ fetcher_func=sectoral_news_monitor.scrape_sectoral_news,
395
+ force_refresh=force_refresh,
396
+ max_items=50,
397
+ hours=24
398
+ )
399
+ if sectoral_news:
400
+ df = pd.DataFrame(sectoral_news)
401
+ if not df.empty:
402
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
403
+ return df, None
404
+ except Exception as e:
405
+ return pd.DataFrame(), f"Sectoral news unavailable: {e}"
406
+ return pd.DataFrame(), None
407
+
408
+ def fetch_market_events():
409
+ """Fetch market events via cache manager"""
410
+ try:
411
+ if market_events_monitor:
412
+ events = cache_manager.get_news(
413
+ source='market_events',
414
+ fetcher_func=market_events_monitor.scrape_market_events,
415
+ force_refresh=force_refresh,
416
+ max_items=50,
417
+ days_ahead=14
418
+ )
419
+ if events:
420
+ df = pd.DataFrame(events)
421
+ if not df.empty:
422
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
423
+ return df, None
424
+ except Exception as e:
425
+ return pd.DataFrame(), f"Market events unavailable: {e}"
426
+ return pd.DataFrame(), None
427
+
428
+ def fetch_economic_calendar():
429
+ """Fetch economic calendar via cache manager"""
430
+ try:
431
+ if economic_calendar_service:
432
+ calendar_events = cache_manager.get_news(
433
+ source='economic_calendar',
434
+ fetcher_func=economic_calendar_service.get_upcoming_events,
435
+ force_refresh=force_refresh,
436
+ days_ahead=7,
437
+ min_importance='medium'
438
+ )
439
+ if calendar_events:
440
+ df = pd.DataFrame(calendar_events)
441
+ if not df.empty:
442
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
443
+ return df, None
444
+ except Exception as e:
445
+ return pd.DataFrame(), f"Economic calendar unavailable: {e}"
446
+ return pd.DataFrame(), None
447
+
448
+ # Progressive loading: Display results as they arrive
449
+ # Create a status placeholder to show progress
450
+ status_placeholder = st.empty()
451
+
452
+ # Execute all news fetching operations in parallel using ThreadPoolExecutor
453
+ with st.spinner("Loading news from 8 sources..."):
454
+ with ThreadPoolExecutor(max_workers=8) as executor:
455
+ # Submit all tasks with source name attached
456
+ futures_map = {
457
+ executor.submit(fetch_twitter_news): 'twitter',
458
+ executor.submit(fetch_reddit_news): 'reddit',
459
+ executor.submit(fetch_rss_news): 'rss',
460
+ executor.submit(fetch_ai_tech_news): 'ai_tech',
461
+ executor.submit(fetch_prediction_markets): 'predictions',
462
+ executor.submit(fetch_sectoral_news): 'sectoral_news',
463
+ executor.submit(fetch_market_events): 'market_events',
464
+ executor.submit(fetch_economic_calendar): 'economic_calendar'
465
+ }
466
+
467
+ # Track errors and completion
468
+ fetch_errors = []
469
+ completed_sources = []
470
+
471
+ # Process results as they complete (progressive loading)
472
+ try:
473
+ for future in as_completed(futures_map, timeout=90):
474
+ source_name = futures_map[future]
475
+
476
+ try:
477
+ result_df, error = future.result()
478
+
479
+ # Update status
480
+ completed_sources.append(source_name)
481
+ status_placeholder.info(f"πŸ” Loaded {len(completed_sources)}/8 sources ({', '.join(completed_sources)})")
482
+
483
+ if source_name == 'twitter':
484
+ twitter_df = result_df
485
+ if error:
486
+ fetch_errors.append(error)
487
+ elif source_name == 'reddit':
488
+ reddit_df = result_df
489
+ if error:
490
+ fetch_errors.append(error)
491
+ elif source_name == 'rss':
492
+ rss_all_df = result_df
493
+ if error:
494
+ fetch_errors.append(error)
495
+ # Get main page news subset for RSS
496
+ if not rss_all_df.empty and 'from_web' in rss_all_df.columns:
497
+ rss_main_df = rss_all_df[rss_all_df['from_web'] == True].copy()
498
+ elif source_name == 'ai_tech':
499
+ ai_tech_df = result_df
500
+ if error:
501
+ fetch_errors.append(error)
502
+ elif source_name == 'predictions':
503
+ predictions_df = result_df
504
+ if error:
505
+ fetch_errors.append(error)
506
+ elif source_name == 'sectoral_news':
507
+ sectoral_news_df = result_df
508
+ if error:
509
+ fetch_errors.append(error)
510
+ elif source_name == 'market_events':
511
+ market_events_df = result_df
512
+ if error:
513
+ fetch_errors.append(error)
514
+ elif source_name == 'economic_calendar':
515
+ economic_calendar_df = result_df
516
+ if error:
517
+ fetch_errors.append(error)
518
+
519
+ except Exception as e:
520
+ fetch_errors.append(f"Error fetching {source_name} news: {e}")
521
+ completed_sources.append(f"{source_name} (error)")
522
+ status_placeholder.warning(f"⚠️ {source_name} failed, continuing with other sources...")
523
+
524
+ except TimeoutError:
525
+ # Handle timeout gracefully - continue with whatever results we have
526
+ fetch_errors.append("⏱️ Some sources timed out after 90 seconds - displaying available results")
527
+ status_placeholder.warning(f"⚠️ {len(completed_sources)}/8 sources loaded (some timed out)")
528
+
529
+ # Mark incomplete sources
530
+ all_sources = set(futures_map.values())
531
+ incomplete_sources = all_sources - set(completed_sources)
532
+ for source in incomplete_sources:
533
+ fetch_errors.append(f"{source} timed out - skipped")
534
+ completed_sources.append(f"{source} (timeout)")
535
+
536
+ # Clear the status message after all sources complete
537
+ status_placeholder.success(f"βœ… Loaded {len(completed_sources)}/8 sources successfully")
538
+
539
+ # Debug logging (console only, not displayed on page)
540
+ import logging
541
+ logger = logging.getLogger(__name__)
542
+ logger.info(f"News Fetch Results: Twitter={len(twitter_df)}, Reddit={len(reddit_df)}, RSS={len(rss_all_df)}, AI/Tech={len(ai_tech_df)}, Predictions={len(predictions_df)}, Sectoral={len(sectoral_news_df)}, Events={len(market_events_df)}, Calendar={len(economic_calendar_df)}")
543
+ logger.info(f"Availability: Predictions={PREDICTIONS_AVAILABLE}, Sectoral={SECTORAL_AVAILABLE}, Events={EVENTS_AVAILABLE}, Calendar={CALENDAR_AVAILABLE}")
544
+ if fetch_errors:
545
+ for err in fetch_errors:
546
+ logger.warning(f"Fetch error: {err}")
547
+
548
+ # Batch AI summarization after all sources are collected
549
+ ai_summary_dfs = [
550
+ twitter_df,
551
+ reddit_df,
552
+ rss_all_df,
553
+ ai_tech_df,
554
+ sectoral_news_df,
555
+ market_events_df,
556
+ economic_calendar_df,
557
+ predictions_df,
558
+ ]
559
+
560
+ all_items = []
561
+ for df in ai_summary_dfs:
562
+ if df.empty:
563
+ continue
564
+ all_items.extend(df.to_dict("records"))
565
+
566
+ if all_items:
567
+ enqueue_items(all_items)
568
+
569
+ # Clear force refresh flag after fetching is complete
570
+ if force_refresh:
571
+ st.session_state.force_refresh = False
572
+
573
+ # Apply filters using cache manager (with filter result caching)
574
+ filters = {
575
+ 'category': category_filter,
576
+ 'sentiment': sentiment_filter,
577
+ 'impact': impact_filter
578
+ }
579
+
580
+ twitter_filtered = cache_manager.get_filtered_news(twitter_df, filters, 'twitter') if not twitter_df.empty else twitter_df
581
+ reddit_filtered = cache_manager.get_filtered_news(reddit_df, filters, 'reddit') if not reddit_df.empty else reddit_df
582
+ rss_main_filtered = cache_manager.get_filtered_news(rss_main_df, filters, 'rss_main') if not rss_main_df.empty else rss_main_df
583
+ rss_all_filtered = cache_manager.get_filtered_news(rss_all_df, filters, 'rss_all') if not rss_all_df.empty else rss_all_df
584
+
585
+ # Combine Twitter and Reddit for first column
586
+ twitter_reddit_df = pd.concat([twitter_filtered, reddit_filtered], ignore_index=True) if not twitter_filtered.empty or not reddit_filtered.empty else pd.DataFrame()
587
+ if not twitter_reddit_df.empty:
588
+ twitter_reddit_df = twitter_reddit_df.sort_values('timestamp', ascending=False)
589
+
590
+ # Combine all for breaking news banner
591
+ all_news_df = pd.concat([twitter_filtered, reddit_filtered, rss_all_filtered], ignore_index=True) if not twitter_filtered.empty or not reddit_filtered.empty or not rss_all_filtered.empty else pd.DataFrame()
592
+
593
+ # Display breaking news banner with ML-based scoring
594
+ if not all_news_df.empty:
595
+ # Initialize the breaking news scorer
596
+ scorer = get_breaking_news_scorer()
597
+
598
+ # Convert DataFrame to list of dicts for scoring
599
+ all_news_list = all_news_df.to_dict('records')
600
+
601
+ # Get top breaking news using multi-factor impact scoring
602
+ # Only show news with impact score >= 40 (medium-high impact threshold)
603
+ breaking_news_items = scorer.get_breaking_news(all_news_list, top_n=1)
604
+
605
+ if breaking_news_items and breaking_news_items[0]['breaking_score'] >= 40.0:
606
+ # Display the highest-impact news in the banner
607
+ breaking_df = pd.DataFrame([breaking_news_items[0]])
608
+ display_breaking_news_banner(breaking_df)
609
+ else:
610
+ # If no high-impact news found, show informational message with score
611
+ if breaking_news_items:
612
+ top_score = breaking_news_items[0]['breaking_score']
613
+ st.info(f"πŸ“Š Monitoring financial markets - highest impact score: {top_score:.1f}/100 (threshold: 40)")
614
+ else:
615
+ st.info("πŸ“Š Monitoring financial markets - no news items available for scoring")
616
+ else:
617
+ # No news data available at all
618
+ st.info("πŸ“Š Loading financial news - breaking news banner will appear when data is available")
619
+
620
+ st.markdown("---")
621
+
622
+ # ---- ECONOMIC CALENDAR WIDGET ----
623
+ if not economic_calendar_df.empty:
624
+ display_economic_calendar_widget(economic_calendar_df)
625
+ st.markdown("---")
626
+
627
+ # ---- FOUR-COLUMN SCROLLABLE NEWS LAYOUT (TradingView Style) ----
628
+
629
+ col1, col2, col3, col4 = st.columns(4)
630
+
631
+ with col1:
632
+ # SECTION 1: Twitter/X & Reddit Breaking News
633
+ if not twitter_reddit_df.empty:
634
+ display_scrollable_news_section(
635
+ twitter_reddit_df,
636
+ section_title="Twitter/X & Reddit News",
637
+ section_icon="🌐",
638
+ section_subtitle="Real-time news from premium accounts & communities (last 12h)",
639
+ max_items=100,
640
+ height="700px"
641
+ )
642
+ elif not twitter_df.empty or not reddit_df.empty:
643
+ st.markdown("""
644
+ <div style="background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 30px; text-align: center;">
645
+ <div style="font-size: 48px; margin-bottom: 16px;">πŸ“­</div>
646
+ <div style="color: #D1D4DC; font-size: 16px; font-weight: 600; margin-bottom: 8px;">No matches found</div>
647
+ <div style="color: #787B86; font-size: 13px;">Try adjusting your filters to see Twitter/X & Reddit news</div>
648
+ </div>
649
+ """, unsafe_allow_html=True)
650
+ else:
651
+ st.markdown("""
652
+ <div style="background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 30px; text-align: center;">
653
+ <div style="font-size: 48px; margin-bottom: 16px; animation: pulse 2s ease-in-out infinite;">⏳</div>
654
+ <div style="color: #D1D4DC; font-size: 16px; font-weight: 600; margin-bottom: 8px;">Loading Twitter/X & Reddit News</div>
655
+ <div style="color: #787B86; font-size: 13px;">Fetching real-time news from premium sources...</div>
656
+ <div style="color: #787B86; font-size: 12px; margin-top: 8px; opacity: 0.7;">This may take 30-60 seconds on first load</div>
657
+ </div>
658
+ <style>
659
+ @keyframes pulse {
660
+ 0%, 100% { opacity: 1; transform: scale(1); }
661
+ 50% { opacity: 0.6; transform: scale(1.1); }
662
+ }
663
+ </style>
664
+ """, unsafe_allow_html=True)
665
+
666
+ with col2:
667
+ # SECTION 2: Main Page News (Web-Scraped)
668
+ if not rss_main_filtered.empty:
669
+ display_scrollable_news_section(
670
+ rss_main_filtered,
671
+ section_title="Top Headlines",
672
+ section_icon="πŸ”₯",
673
+ section_subtitle="Latest from main pages",
674
+ max_items=50,
675
+ height="700px"
676
+ )
677
+ elif not rss_main_df.empty:
678
+ st.markdown("""
679
+ <div style="background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 30px; text-align: center;">
680
+ <div style="font-size: 48px; margin-bottom: 16px;">πŸ“­</div>
681
+ <div style="color: #D1D4DC; font-size: 16px; font-weight: 600; margin-bottom: 8px;">No matches found</div>
682
+ <div style="color: #787B86; font-size: 13px;">Try adjusting your filters to see top headlines</div>
683
+ </div>
684
+ """, unsafe_allow_html=True)
685
+ else:
686
+ st.markdown("""
687
+ <div style="background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 30px; text-align: center;">
688
+ <div style="font-size: 48px; margin-bottom: 16px; animation: pulse 2s ease-in-out infinite;">⏳</div>
689
+ <div style="color: #D1D4DC; font-size: 16px; font-weight: 600; margin-bottom: 8px;">Loading Top Headlines</div>
690
+ <div style="color: #787B86; font-size: 13px;">Fetching latest news from major outlets...</div>
691
+ <div style="color: #787B86; font-size: 12px; margin-top: 8px; opacity: 0.7;">Web scraping main pages</div>
692
+ </div>
693
+ <style>
694
+ @keyframes pulse {
695
+ 0%, 100% { opacity: 1; transform: scale(1); }
696
+ 50% { opacity: 0.6; transform: scale(1.1); }
697
+ }
698
+ </style>
699
+ """, unsafe_allow_html=True)
700
+
701
+ with col3:
702
+ # SECTION 3: RSS Feed News
703
+ if not rss_all_filtered.empty:
704
+ display_scrollable_news_section(
705
+ rss_all_filtered,
706
+ section_title="RSS Feed",
707
+ section_icon="πŸ“°",
708
+ section_subtitle="Aggregated from all sources",
709
+ max_items=100,
710
+ height="700px"
711
+ )
712
+ elif not rss_all_df.empty:
713
+ st.markdown("""
714
+ <div style="background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 30px; text-align: center;">
715
+ <div style="font-size: 48px; margin-bottom: 16px;">πŸ“­</div>
716
+ <div style="color: #D1D4DC; font-size: 16px; font-weight: 600; margin-bottom: 8px;">No matches found</div>
717
+ <div style="color: #787B86; font-size: 13px;">Try adjusting your filters to see RSS feed news</div>
718
+ </div>
719
+ """, unsafe_allow_html=True)
720
+ else:
721
+ st.markdown("""
722
+ <div style="background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 30px; text-align: center;">
723
+ <div style="font-size: 48px; margin-bottom: 16px; animation: pulse 2s ease-in-out infinite;">⏳</div>
724
+ <div style="color: #D1D4DC; font-size: 16px; font-weight: 600; margin-bottom: 8px;">Loading RSS Feed</div>
725
+ <div style="color: #787B86; font-size: 13px;">Aggregating news from all RSS sources...</div>
726
+ <div style="color: #787B86; font-size: 12px; margin-top: 8px; opacity: 0.7;">Bloomberg, Reuters, FT, WSJ & more</div>
727
+ </div>
728
+ <style>
729
+ @keyframes pulse {
730
+ 0%, 100% { opacity: 1; transform: scale(1); }
731
+ 50% { opacity: 0.6; transform: scale(1.1); }
732
+ }
733
+ </style>
734
+ """, unsafe_allow_html=True)
735
+
736
+ with col4:
737
+ # SECTION 4: AI & Tech News
738
+ if not ai_tech_df.empty:
739
+ display_scrollable_news_section(
740
+ ai_tech_df,
741
+ section_title="AI & Tech News",
742
+ section_icon="πŸ€–",
743
+ section_subtitle="Latest from tech giants & AI research",
744
+ max_items=100,
745
+ height="700px"
746
+ )
747
+ else:
748
+ # Debug: Check if there's an AI/Tech specific error
749
+ ai_tech_error = next((err for err in fetch_errors if 'ai_tech' in err.lower() or 'AI/Tech' in err), None) if 'fetch_errors' in locals() else None
750
+
751
+ if ai_tech_error:
752
+ # Show error message
753
+ st.markdown(f"""
754
+ <div style="background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 30px; text-align: center;">
755
+ <div style="font-size: 48px; margin-bottom: 16px;">⚠️</div>
756
+ <div style="color: #D1D4DC; font-size: 16px; font-weight: 600; margin-bottom: 8px;">AI & Tech News Unavailable</div>
757
+ <div style="color: #787B86; font-size: 13px;">{ai_tech_error}</div>
758
+ </div>
759
+ """, unsafe_allow_html=True)
760
+ else:
761
+ # Show loading message
762
+ st.markdown("""
763
+ <div style="background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 30px; text-align: center;">
764
+ <div style="font-size: 48px; margin-bottom: 16px; animation: pulse 2s ease-in-out infinite;">⏳</div>
765
+ <div style="color: #D1D4DC; font-size: 16px; font-weight: 600; margin-bottom: 8px;">Loading AI & Tech News</div>
766
+ <div style="color: #787B86; font-size: 13px;">Aggregating from tech blogs & research...</div>
767
+ <div style="color: #787B86; font-size: 12px; margin-top: 8px; opacity: 0.7;">OpenAI, Google AI, Microsoft, Meta & more</div>
768
+ <div style="color: #FF9500; font-size: 12px; margin-top: 12px;">If this persists, check the "Source Fetch Warnings" section below</div>
769
+ </div>
770
+ <style>
771
+ @keyframes pulse {
772
+ 0%, 100% { opacity: 1; transform: scale(1); }
773
+ 50% { opacity: 0.6; transform: scale(1.1); }
774
+ }
775
+ </style>
776
+ """, unsafe_allow_html=True)
777
+
778
+ # ---- SECOND ROW: MARKET INTELLIGENCE (3 COLUMNS) ----
779
+ st.markdown("---")
780
+ st.markdown("## πŸ“Š Market Intelligence - Predictions, Sectors & Events")
781
+
782
+ col5, col6, col7 = st.columns(3)
783
+
784
+ with col5:
785
+ # Prediction Markets Column
786
+ if not predictions_df.empty:
787
+ display_scrollable_news_section(
788
+ predictions_df,
789
+ section_title="Prediction Markets",
790
+ section_icon="🎲",
791
+ section_subtitle="Polymarket, Metaculus & CME FedWatch",
792
+ max_items=50,
793
+ height="600px"
794
+ )
795
+ else:
796
+ st.markdown("""
797
+ <div style="background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 30px; text-align: center;">
798
+ <div style="font-size: 48px; margin-bottom: 16px; animation: pulse 2s ease-in-out infinite;">⏳</div>
799
+ <div style="color: #D1D4DC; font-size: 16px; font-weight: 600; margin-bottom: 8px;">Loading Prediction Markets</div>
800
+ <div style="color: #787B86; font-size: 13px;">Fetching market forecasts...</div>
801
+ </div>
802
+ <style>
803
+ @keyframes pulse {
804
+ 0%, 100% { opacity: 1; transform: scale(1); }
805
+ 50% { opacity: 0.6; transform: scale(1.1); }
806
+ }
807
+ </style>
808
+ """, unsafe_allow_html=True)
809
+
810
+ with col6:
811
+ # Sectoral News Column
812
+ if not sectoral_news_df.empty:
813
+ display_scrollable_news_section(
814
+ sectoral_news_df,
815
+ section_title="Sectoral News",
816
+ section_icon="🏭",
817
+ section_subtitle="7 sectors: Finance, Tech, Energy & more",
818
+ max_items=50,
819
+ height="600px"
820
+ )
821
+ else:
822
+ st.markdown("""
823
+ <div style="background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 30px; text-align: center;">
824
+ <div style="font-size: 48px; margin-bottom: 16px; animation: pulse 2s ease-in-out infinite;">⏳</div>
825
+ <div style="color: #D1D4DC; font-size: 16px; font-weight: 600; margin-bottom: 8px;">Loading Sectoral News</div>
826
+ <div style="color: #787B86; font-size: 13px;">Aggregating sector-specific news...</div>
827
+ </div>
828
+ <style>
829
+ @keyframes pulse {
830
+ 0%, 100% { opacity: 1; transform: scale(1); }
831
+ 50% { opacity: 0.6; transform: scale(1.1); }
832
+ }
833
+ </style>
834
+ """, unsafe_allow_html=True)
835
+
836
+ with col7:
837
+ # Market Events Column
838
+ if not market_events_df.empty:
839
+ display_scrollable_news_section(
840
+ market_events_df,
841
+ section_title="Market Events",
842
+ section_icon="πŸ“ˆ",
843
+ section_subtitle="Earnings, indicators & central banks",
844
+ max_items=50,
845
+ height="600px"
846
+ )
847
+ else:
848
+ st.markdown("""
849
+ <div style="background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 30px; text-align: center;">
850
+ <div style="font-size: 48px; margin-bottom: 16px; animation: pulse 2s ease-in-out infinite;">⏳</div>
851
+ <div style="color: #D1D4DC; font-size: 16px; font-weight: 600; margin-bottom: 8px;">Loading Market Events</div>
852
+ <div style="color: #787B86; font-size: 13px;">Fetching earnings & economic indicators...</div>
853
+ </div>
854
+ <style>
855
+ @keyframes pulse {
856
+ 0%, 100% { opacity: 1; transform: scale(1); }
857
+ 50% { opacity: 0.6; transform: scale(1.1); }
858
+ }
859
+ </style>
860
+ """, unsafe_allow_html=True)
861
+
862
+ # Display fetch errors in expander (less intrusive)
863
+ if 'fetch_errors' in locals() and fetch_errors:
864
+ with st.expander("⚠️ Source Fetch Warnings", expanded=False):
865
+ for error in fetch_errors:
866
+ st.caption(f"β€’ {error}")
867
+
868
+ # ---- AI SUMMARY METRICS ----
869
+ total_items = sum(len(df) for df in ai_summary_dfs if not df.empty)
870
+ ai_summarized = 0
871
+ for df in ai_summary_dfs:
872
+ if df.empty or "summary_ai" not in df.columns:
873
+ continue
874
+ ai_summarized += df["summary_ai"].fillna("").astype(str).str.strip().ne("").sum()
875
+
876
+ ai_summary_pct = (ai_summarized / total_items * 100) if total_items else 0.0
877
+
878
+ st.markdown("---")
879
+ @st.fragment(run_every=60)
880
+ def render_ai_summary_section():
881
+ summaries = fetch_summaries(limit=50)
882
+ status = get_status()
883
+ last_update_text = status.get("last_update") or "N/A"
884
+ buffer_remaining = status.get("buffer_remaining_seconds")
885
+ buffer_text = "N/A"
886
+ if buffer_remaining is not None:
887
+ buffer_text = f"{int(buffer_remaining)}s"
888
+
889
+ st.markdown("## πŸ€– AI Summary")
890
+ st.markdown(
891
+ f"""
892
+ <div style="background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 20px; margin-bottom: 12px;">
893
+ <div style="color: #E0E3EB; font-size: 16px; font-weight: 600; margin-bottom: 6px;">Current AI Summarizations</div>
894
+ <div style="color: #D1D4DC; font-size: 14px; line-height: 1.6;">
895
+ {ai_summarized} / {total_items} items summarized
896
+ <span style="color: #787B86; font-size: 12px; margin-left: 8px;">({ai_summary_pct:.1f}% coverage)</span>
897
+ </div>
898
+ <div style="color: #787B86; font-size: 12px; margin-top: 6px;">Last update: {last_update_text}</div>
899
+ <div style="color: #787B86; font-size: 12px;">Buffer: {status.get("buffer_size", 0)} items, next flush in {buffer_text}</div>
900
+ <div style="color: #787B86; font-size: 12px;">Cache: {status.get("total_summaries", 0)} summaries, batch max ~{status.get("batch_max_chars", 0)} chars</div>
901
+ </div>
902
+ """,
903
+ unsafe_allow_html=True,
904
+ )
905
+
906
+ if summaries:
907
+ for item in summaries:
908
+ source = item.get("source", "")
909
+ summary = item.get("summary", "")
910
+ title = item.get("title", "")
911
+ st.markdown(
912
+ f"""
913
+ <div style="background: #131722; border: 1px solid #2A2E39; border-radius: 6px; padding: 10px; margin-bottom: 8px;">
914
+ <div style="color: #E0E3EB; font-size: 13px; font-weight: 600;">{source} β€” {title}</div>
915
+ <div style="color: #D1D4DC; font-size: 13px; margin-top: 4px;">{summary}</div>
916
+ </div>
917
+ """,
918
+ unsafe_allow_html=True,
919
+ )
920
+ else:
921
+ st.info("AI summaries will appear after the 2-minute buffering window completes.")
922
+
923
+ render_ai_summary_section()
924
+
925
+ # Auto-refresh logic
926
+ if auto_refresh:
927
+ import time
928
+ time.sleep(180) # 3 minutes
929
+ st.rerun()
930
+
931
+ # ---- Footer with Instructions ----
932
+ st.markdown("---")
933
+ st.markdown("""
934
+ ### πŸ’‘ How to Use This Dashboard
935
+
936
+ **For Traders:**
937
+ - Monitor breaking news in real-time for market-moving events
938
+ - Filter by category to focus on macro, markets, or geopolitical news
939
+ - Use sentiment analysis to gauge market mood
940
+ - High-impact news items require immediate attention
941
+
942
+ **Tips:**
943
+ - Enable auto-refresh for continuous monitoring during trading hours
944
+ - Focus on "HIGH IMPACT" news for potential volatility
945
+ - Breaking news (πŸ”΄) indicates urgent market-moving information
946
+ - Check engagement metrics (likes + retweets) for news importance
947
+
948
+ **Data Source:** Dual-mode scraping - RSS feeds + direct web page parsing from Reuters, Bloomberg, FT, WSJ, CNBC, Google News, Yahoo Finance, Fed, ECB and more
949
+ **Update Frequency:** 3-minute cache for low-latency delivery
950
+ **No Authentication Required:** Public sources - works out of the box
951
+ """)
app/services/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Services package for financial platform."""
app/services/ai_tech_news.py ADDED
@@ -0,0 +1,293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ AI & Tech News Scraper
3
+ Fetches news from popular tech resources and big tech company blogs
4
+ """
5
+
6
+ import feedparser
7
+ import requests
8
+ from bs4 import BeautifulSoup
9
+ from datetime import datetime, timedelta
10
+ from typing import List, Dict
11
+ import logging
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class AITechNewsScraper:
17
+ """Scraper for AI and tech news from major sources and company blogs"""
18
+
19
+ # AI/Tech News Sources (RSS + Web)
20
+ SOURCES = {
21
+ # Major Tech News
22
+ 'TechCrunch AI': {
23
+ 'url': 'https://techcrunch.com/category/artificial-intelligence/feed/',
24
+ 'type': 'rss',
25
+ 'category': 'ai'
26
+ },
27
+ 'The Verge AI': {
28
+ 'url': 'https://www.theverge.com/ai-artificial-intelligence/rss/index.xml',
29
+ 'type': 'rss',
30
+ 'category': 'ai'
31
+ },
32
+ 'VentureBeat AI': {
33
+ 'url': 'https://venturebeat.com/category/ai/feed/',
34
+ 'type': 'rss',
35
+ 'category': 'ai'
36
+ },
37
+ 'MIT Technology Review AI': {
38
+ 'url': 'https://www.technologyreview.com/topic/artificial-intelligence/feed',
39
+ 'type': 'rss',
40
+ 'category': 'ai'
41
+ },
42
+ 'Ars Technica AI': {
43
+ 'url': 'https://feeds.arstechnica.com/arstechnica/technology-lab',
44
+ 'type': 'rss',
45
+ 'category': 'tech'
46
+ },
47
+ 'Wired AI': {
48
+ 'url': 'https://www.wired.com/feed/tag/ai/latest/rss',
49
+ 'type': 'rss',
50
+ 'category': 'ai'
51
+ },
52
+
53
+ # Big Tech Company Blogs
54
+ 'OpenAI Blog': {
55
+ 'url': 'https://openai.com/blog/rss.xml',
56
+ 'type': 'rss',
57
+ 'category': 'ai'
58
+ },
59
+ 'Google AI Blog': {
60
+ 'url': 'https://blog.google/technology/ai/rss/',
61
+ 'type': 'rss',
62
+ 'category': 'ai'
63
+ },
64
+ 'Microsoft AI Blog': {
65
+ 'url': 'https://blogs.microsoft.com/ai/feed/',
66
+ 'type': 'rss',
67
+ 'category': 'ai'
68
+ },
69
+ 'Meta AI Blog': {
70
+ 'url': 'https://ai.meta.com/blog/rss/',
71
+ 'type': 'rss',
72
+ 'category': 'ai'
73
+ },
74
+ 'DeepMind Blog': {
75
+ 'url': 'https://deepmind.google/blog/rss.xml',
76
+ 'type': 'rss',
77
+ 'category': 'ai'
78
+ },
79
+ 'Anthropic News': {
80
+ 'url': 'https://www.anthropic.com/news/rss.xml',
81
+ 'type': 'rss',
82
+ 'category': 'ai'
83
+ },
84
+ 'AWS AI Blog': {
85
+ 'url': 'https://aws.amazon.com/blogs/machine-learning/feed/',
86
+ 'type': 'rss',
87
+ 'category': 'ai'
88
+ },
89
+ 'NVIDIA AI Blog': {
90
+ 'url': 'https://blogs.nvidia.com/feed/',
91
+ 'type': 'rss',
92
+ 'category': 'ai'
93
+ },
94
+
95
+ # Research & Academia
96
+ 'Stanford HAI': {
97
+ 'url': 'https://hai.stanford.edu/news/rss.xml',
98
+ 'type': 'rss',
99
+ 'category': 'research'
100
+ },
101
+ 'Berkeley AI Research': {
102
+ 'url': 'https://bair.berkeley.edu/blog/feed.xml',
103
+ 'type': 'rss',
104
+ 'category': 'research'
105
+ },
106
+ }
107
+
108
+ def __init__(self):
109
+ """Initialize the AI/Tech news scraper"""
110
+ self.session = requests.Session()
111
+ self.session.headers.update({
112
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
113
+ })
114
+
115
+ def scrape_ai_tech_news(self, max_items: int = 100, hours: int = 48) -> List[Dict]:
116
+ """
117
+ Scrape AI and tech news from all sources
118
+
119
+ Args:
120
+ max_items: Maximum number of news items to return
121
+ hours: Only include news from the last N hours
122
+
123
+ Returns:
124
+ List of news items with standardized format
125
+ """
126
+ all_news = []
127
+ cutoff_time = datetime.now() - timedelta(hours=hours)
128
+
129
+ for source_name, source_config in self.SOURCES.items():
130
+ try:
131
+ if source_config['type'] == 'rss':
132
+ news_items = self._scrape_rss_feed(
133
+ source_name,
134
+ source_config['url'],
135
+ source_config['category'],
136
+ cutoff_time
137
+ )
138
+ all_news.extend(news_items)
139
+ logger.info(f"Scraped {len(news_items)} items from {source_name}")
140
+
141
+ except Exception as e:
142
+ logger.error(f"Error scraping {source_name}: {e}")
143
+ continue
144
+
145
+ # Sort by timestamp (newest first)
146
+ all_news.sort(key=lambda x: x['timestamp'], reverse=True)
147
+
148
+ # Limit to max_items
149
+ return all_news[:max_items]
150
+
151
+ def _scrape_rss_feed(self, source_name: str, feed_url: str,
152
+ category: str, cutoff_time: datetime) -> List[Dict]:
153
+ """Scrape a single RSS feed"""
154
+ news_items = []
155
+
156
+ try:
157
+ feed = feedparser.parse(feed_url)
158
+
159
+ for entry in feed.entries:
160
+ try:
161
+ # Parse timestamp
162
+ if hasattr(entry, 'published_parsed') and entry.published_parsed:
163
+ timestamp = datetime(*entry.published_parsed[:6])
164
+ elif hasattr(entry, 'updated_parsed') and entry.updated_parsed:
165
+ timestamp = datetime(*entry.updated_parsed[:6])
166
+ else:
167
+ timestamp = datetime.now()
168
+
169
+ # Skip old news
170
+ if timestamp < cutoff_time:
171
+ continue
172
+
173
+ # Extract title and summary
174
+ title = entry.get('title', 'No title')
175
+ summary = entry.get('summary', entry.get('description', ''))
176
+
177
+ # Clean HTML from summary
178
+ if summary:
179
+ soup = BeautifulSoup(summary, 'html.parser')
180
+ summary = soup.get_text().strip()
181
+ # Limit summary length
182
+ if len(summary) > 300:
183
+ summary = summary[:297] + '...'
184
+
185
+ # Determine impact and sentiment based on keywords
186
+ impact = self._determine_impact(title, summary)
187
+ sentiment = self._determine_sentiment(title, summary)
188
+
189
+ news_item = {
190
+ 'title': title,
191
+ 'summary': summary or title,
192
+ 'source': source_name,
193
+ 'url': entry.get('link', ''),
194
+ 'timestamp': timestamp,
195
+ 'category': category,
196
+ 'impact': impact,
197
+ 'sentiment': sentiment,
198
+ 'is_breaking': self._is_breaking_news(title, summary),
199
+ 'likes': 0, # No engagement data for RSS
200
+ 'retweets': 0,
201
+ 'reddit_score': 0,
202
+ 'reddit_comments': 0
203
+ }
204
+
205
+ news_items.append(news_item)
206
+
207
+ except Exception as e:
208
+ logger.error(f"Error parsing entry from {source_name}: {e}")
209
+ continue
210
+
211
+ except Exception as e:
212
+ logger.error(f"Error fetching RSS feed {feed_url}: {e}")
213
+
214
+ return news_items
215
+
216
+ def _determine_impact(self, title: str, summary: str) -> str:
217
+ """Determine impact level based on keywords"""
218
+ text = f"{title} {summary}".lower()
219
+
220
+ high_impact_keywords = [
221
+ 'breakthrough', 'announce', 'launch', 'release', 'new model',
222
+ 'gpt', 'claude', 'gemini', 'llama', 'chatgpt',
223
+ 'billion', 'trillion', 'acquisition', 'merger',
224
+ 'regulation', 'ban', 'lawsuit', 'security breach',
225
+ 'major', 'significant', 'revolutionary', 'first-ever'
226
+ ]
227
+
228
+ medium_impact_keywords = [
229
+ 'update', 'improve', 'enhance', 'study', 'research',
230
+ 'partnership', 'collaboration', 'funding', 'investment',
231
+ 'expands', 'grows', 'adopts', 'implements'
232
+ ]
233
+
234
+ for keyword in high_impact_keywords:
235
+ if keyword in text:
236
+ return 'high'
237
+
238
+ for keyword in medium_impact_keywords:
239
+ if keyword in text:
240
+ return 'medium'
241
+
242
+ return 'low'
243
+
244
+ def _determine_sentiment(self, title: str, summary: str) -> str:
245
+ """Determine sentiment based on keywords"""
246
+ text = f"{title} {summary}".lower()
247
+
248
+ positive_keywords = [
249
+ 'breakthrough', 'success', 'achieve', 'improve', 'advance',
250
+ 'innovative', 'revolutionary', 'launch', 'release', 'win',
251
+ 'growth', 'expand', 'partnership', 'collaboration'
252
+ ]
253
+
254
+ negative_keywords = [
255
+ 'fail', 'issue', 'problem', 'concern', 'worry', 'risk',
256
+ 'ban', 'lawsuit', 'breach', 'hack', 'leak', 'crisis',
257
+ 'decline', 'loss', 'shutdown', 'controversy'
258
+ ]
259
+
260
+ positive_count = sum(1 for kw in positive_keywords if kw in text)
261
+ negative_count = sum(1 for kw in negative_keywords if kw in text)
262
+
263
+ if positive_count > negative_count:
264
+ return 'positive'
265
+ elif negative_count > positive_count:
266
+ return 'negative'
267
+ else:
268
+ return 'neutral'
269
+
270
+ def _is_breaking_news(self, title: str, summary: str) -> bool:
271
+ """Determine if news is breaking"""
272
+ text = f"{title} {summary}".lower()
273
+
274
+ breaking_indicators = [
275
+ 'breaking', 'just announced', 'just released', 'just launched',
276
+ 'alert', 'urgent', 'developing', 'live', 'now:'
277
+ ]
278
+
279
+ return any(indicator in text for indicator in breaking_indicators)
280
+
281
+ def get_statistics(self) -> Dict:
282
+ """Get statistics - returns empty for backward compatibility"""
283
+ return {
284
+ 'total': 0,
285
+ 'high_impact': 0,
286
+ 'breaking': 0,
287
+ 'last_update': 'Managed by cache',
288
+ 'by_category': {
289
+ 'ai': 0,
290
+ 'tech': 0,
291
+ 'research': 0
292
+ }
293
+ }
app/services/economic_calendar.py ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Economic Calendar Scraper - Investing.com
3
+ Scrapes upcoming economic events, indicators, and releases
4
+ No API key required - web scraping approach
5
+ """
6
+
7
+ from datetime import datetime, timedelta
8
+ from typing import List, Dict, Optional
9
+ import logging
10
+ import re
11
+
12
+ import requests
13
+ from bs4 import BeautifulSoup
14
+
15
+ # Configure logging
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class EconomicCalendarService:
21
+ """
22
+ Scrapes economic calendar data from Investing.com
23
+ Focus: High and medium importance events
24
+ """
25
+
26
+ def __init__(self):
27
+ """Initialize scraper with session"""
28
+ self.session = requests.Session()
29
+ self.session.headers.update({
30
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
31
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
32
+ 'Accept-Language': 'en-US,en;q=0.9',
33
+ 'Accept-Encoding': 'gzip, deflate, br',
34
+ 'Referer': 'https://www.google.com/',
35
+ 'DNT': '1',
36
+ 'Connection': 'keep-alive',
37
+ 'Upgrade-Insecure-Requests': '1',
38
+ 'Sec-Fetch-Dest': 'document',
39
+ 'Sec-Fetch-Mode': 'navigate',
40
+ 'Sec-Fetch-Site': 'none',
41
+ 'Cache-Control': 'max-age=0'
42
+ })
43
+
44
+ def get_upcoming_events(self, days_ahead: int = 7, min_importance: str = 'medium') -> List[Dict]:
45
+ """
46
+ Get upcoming economic events
47
+ Returns list of events in standardized format
48
+ """
49
+ try:
50
+ # Try to scrape from Investing.com
51
+ events = self._scrape_investing_com(days_ahead, min_importance)
52
+
53
+ if events:
54
+ logger.info(f"Scraped {len(events)} economic events from Investing.com")
55
+ return events
56
+ else:
57
+ logger.warning("No events scraped - using mock data")
58
+ return self._get_mock_events()
59
+
60
+ except Exception as e:
61
+ logger.error(f"Error fetching economic calendar: {e}")
62
+ return self._get_mock_events()
63
+
64
+ def _scrape_investing_com(self, days_ahead: int, min_importance: str) -> List[Dict]:
65
+ """
66
+ Scrape economic calendar from Investing.com
67
+ Note: This may be fragile and break if they change their HTML structure
68
+ """
69
+ try:
70
+ url = 'https://www.investing.com/economic-calendar/'
71
+ response = self.session.get(url, timeout=10)
72
+ response.raise_for_status()
73
+
74
+ soup = BeautifulSoup(response.content, 'html.parser')
75
+ events = []
76
+
77
+ # Investing.com uses a table structure for the calendar
78
+ # Look for table rows with event data
79
+ calendar_table = soup.find('table', {'id': 'economicCalendarData'})
80
+
81
+ if not calendar_table:
82
+ logger.warning("Could not find economic calendar table on Investing.com")
83
+ return []
84
+
85
+ rows = calendar_table.find_all('tr', {'class': 'js-event-item'})
86
+
87
+ for row in rows[:50]: # Limit to 50 events
88
+ try:
89
+ # Extract event data from row
90
+ event_data = self._parse_event_row(row)
91
+
92
+ if event_data and self._should_include_event(event_data, days_ahead, min_importance):
93
+ events.append(event_data)
94
+
95
+ except Exception as e:
96
+ logger.debug(f"Error parsing event row: {e}")
97
+ continue
98
+
99
+ return events
100
+
101
+ except Exception as e:
102
+ logger.error(f"Error scraping Investing.com: {e}")
103
+ return []
104
+
105
+ def _parse_event_row(self, row) -> Optional[Dict]:
106
+ """Parse a single event row from Investing.com table"""
107
+ try:
108
+ # Extract timestamp
109
+ timestamp_elem = row.find('td', {'class': 'first left time'})
110
+ time_str = timestamp_elem.get_text(strip=True) if timestamp_elem else ''
111
+
112
+ # Extract country
113
+ country_elem = row.find('td', {'class': 'flagCur'})
114
+ country = country_elem.get('title', 'US') if country_elem else 'US'
115
+
116
+ # Extract importance (bull icons)
117
+ importance_elem = row.find('td', {'class': 'sentiment'})
118
+ importance = self._parse_importance(importance_elem) if importance_elem else 'low'
119
+
120
+ # Extract event name
121
+ event_elem = row.find('td', {'class': 'left event'})
122
+ event_name = event_elem.get_text(strip=True) if event_elem else ''
123
+
124
+ # Extract actual, forecast, previous values
125
+ actual_elem = row.find('td', {'id': re.compile('eventActual_')})
126
+ forecast_elem = row.find('td', {'id': re.compile('eventForecast_')})
127
+ previous_elem = row.find('td', {'id': re.compile('eventPrevious_')})
128
+
129
+ actual = self._parse_value(actual_elem.get_text(strip=True) if actual_elem else '')
130
+ forecast = self._parse_value(forecast_elem.get_text(strip=True) if forecast_elem else '')
131
+ previous = self._parse_value(previous_elem.get_text(strip=True) if previous_elem else '')
132
+
133
+ # Create event dictionary
134
+ event_date = self._parse_event_time(time_str)
135
+ time_to_event = self._calculate_time_to_event(event_date)
136
+
137
+ return {
138
+ 'id': hash(f"{event_name}_{event_date}_{country}"),
139
+ 'title': f"{country} - {event_name}",
140
+ 'event_name': event_name,
141
+ 'event_date': event_date,
142
+ 'country': country,
143
+ 'category': self._categorize_event(event_name),
144
+ 'importance': importance,
145
+ 'forecast': forecast,
146
+ 'previous': previous,
147
+ 'actual': actual,
148
+ 'time_to_event': time_to_event,
149
+ 'timestamp': datetime.now(),
150
+ 'source': 'Investing.com',
151
+ 'url': 'https://www.investing.com/economic-calendar/',
152
+ 'impact': importance, # Map importance to impact
153
+ 'sentiment': self._determine_sentiment(actual, forecast, previous)
154
+ }
155
+
156
+ except Exception as e:
157
+ logger.debug(f"Error parsing event row: {e}")
158
+ return None
159
+
160
+ def _parse_importance(self, importance_elem) -> str:
161
+ """Parse importance from bull icons"""
162
+ if not importance_elem:
163
+ return 'low'
164
+
165
+ # Investing.com uses bull icons (1-3 bulls)
166
+ bulls = importance_elem.find_all('i', {'class': 'grayFullBullishIcon'})
167
+ num_bulls = len(bulls)
168
+
169
+ if num_bulls >= 3:
170
+ return 'high'
171
+ elif num_bulls == 2:
172
+ return 'medium'
173
+ else:
174
+ return 'low'
175
+
176
+ def _parse_value(self, value_str: str) -> Optional[float]:
177
+ """Parse numeric value from string"""
178
+ if not value_str or value_str == '' or value_str == '-':
179
+ return None
180
+
181
+ try:
182
+ # Remove % sign, K, M, B suffixes
183
+ value_str = value_str.replace('%', '').replace('K', '').replace('M', '').replace('B', '')
184
+ value_str = value_str.replace(',', '')
185
+ return float(value_str)
186
+ except:
187
+ return None
188
+
189
+ def _parse_event_time(self, time_str: str) -> datetime:
190
+ """Parse event time string to datetime"""
191
+ try:
192
+ # Investing.com uses formats like "10:00" or "All Day"
193
+ if 'All Day' in time_str or not time_str:
194
+ # Default to noon today
195
+ return datetime.now().replace(hour=12, minute=0, second=0, microsecond=0)
196
+
197
+ # Parse time (assumes today for now - real implementation would need date context)
198
+ time_parts = time_str.split(':')
199
+ hour = int(time_parts[0])
200
+ minute = int(time_parts[1]) if len(time_parts) > 1 else 0
201
+
202
+ event_time = datetime.now().replace(hour=hour, minute=minute, second=0, microsecond=0)
203
+
204
+ # If time has passed today, assume it's tomorrow
205
+ if event_time < datetime.now():
206
+ event_time += timedelta(days=1)
207
+
208
+ return event_time
209
+
210
+ except Exception as e:
211
+ logger.debug(f"Error parsing time: {e}")
212
+ return datetime.now() + timedelta(hours=2)
213
+
214
+ def _calculate_time_to_event(self, event_date: datetime) -> str:
215
+ """Calculate human-readable time until event"""
216
+ delta = event_date - datetime.now()
217
+
218
+ if delta.total_seconds() < 0:
219
+ return "In progress"
220
+
221
+ days = delta.days
222
+ hours = delta.seconds // 3600
223
+ minutes = (delta.seconds % 3600) // 60
224
+
225
+ if days > 0:
226
+ return f"in {days}d {hours}h"
227
+ elif hours > 0:
228
+ return f"in {hours}h {minutes}m"
229
+ else:
230
+ return f"in {minutes}m"
231
+
232
+ def _categorize_event(self, event_name: str) -> str:
233
+ """Categorize economic event"""
234
+ event_lower = event_name.lower()
235
+
236
+ if any(kw in event_lower for kw in ['cpi', 'inflation', 'pce', 'price']):
237
+ return 'inflation'
238
+ elif any(kw in event_lower for kw in ['employment', 'jobs', 'unemployment', 'nfp', 'payroll']):
239
+ return 'employment'
240
+ elif any(kw in event_lower for kw in ['gdp', 'growth']):
241
+ return 'gdp'
242
+ elif any(kw in event_lower for kw in ['fed', 'fomc', 'ecb', 'rate', 'boe', 'boj']):
243
+ return 'central_bank'
244
+ elif any(kw in event_lower for kw in ['pmi', 'manufacturing', 'services']):
245
+ return 'pmi'
246
+ else:
247
+ return 'other'
248
+
249
+ def _determine_sentiment(self, actual: Optional[float], forecast: Optional[float], previous: Optional[float]) -> str:
250
+ """Determine sentiment based on actual vs forecast"""
251
+ if actual is None or forecast is None:
252
+ return 'neutral'
253
+
254
+ if actual > forecast:
255
+ return 'positive' # Beat forecast
256
+ elif actual < forecast:
257
+ return 'negative' # Missed forecast
258
+ else:
259
+ return 'neutral'
260
+
261
+ def _should_include_event(self, event: Dict, days_ahead: int, min_importance: str) -> bool:
262
+ """Determine if event should be included"""
263
+ # Filter by importance
264
+ importance_levels = ['low', 'medium', 'high']
265
+ min_level = importance_levels.index(min_importance)
266
+ event_level = importance_levels.index(event['importance'])
267
+
268
+ if event_level < min_level:
269
+ return False
270
+
271
+ # Filter by date range
272
+ days_until = (event['event_date'] - datetime.now()).days
273
+ if days_until > days_ahead:
274
+ return False
275
+
276
+ return True
277
+
278
+ def _get_mock_events(self) -> List[Dict]:
279
+ """Mock economic events for development/testing"""
280
+ now = datetime.now()
281
+
282
+ return [
283
+ {
284
+ 'id': 1,
285
+ 'title': 'US - Consumer Price Index (CPI)',
286
+ 'event_name': 'Consumer Price Index',
287
+ 'event_date': now + timedelta(hours=2),
288
+ 'country': 'US',
289
+ 'category': 'inflation',
290
+ 'importance': 'high',
291
+ 'forecast': 2.5,
292
+ 'previous': 2.3,
293
+ 'actual': None,
294
+ 'time_to_event': 'in 2h 0m',
295
+ 'timestamp': now,
296
+ 'source': 'Economic Calendar',
297
+ 'url': 'https://www.investing.com/economic-calendar/',
298
+ 'impact': 'high',
299
+ 'sentiment': 'neutral'
300
+ },
301
+ {
302
+ 'id': 2,
303
+ 'title': 'US - Non-Farm Payrolls (NFP)',
304
+ 'event_name': 'Non-Farm Payrolls',
305
+ 'event_date': now + timedelta(days=2, hours=8, minutes=30),
306
+ 'country': 'US',
307
+ 'category': 'employment',
308
+ 'importance': 'high',
309
+ 'forecast': 180.0,
310
+ 'previous': 175.0,
311
+ 'actual': None,
312
+ 'time_to_event': 'in 2d 8h',
313
+ 'timestamp': now,
314
+ 'source': 'Economic Calendar',
315
+ 'url': 'https://www.investing.com/economic-calendar/',
316
+ 'impact': 'high',
317
+ 'sentiment': 'neutral'
318
+ },
319
+ {
320
+ 'id': 3,
321
+ 'title': 'EU - ECB Interest Rate Decision',
322
+ 'event_name': 'ECB Interest Rate Decision',
323
+ 'event_date': now + timedelta(days=3, hours=12),
324
+ 'country': 'EU',
325
+ 'category': 'central_bank',
326
+ 'importance': 'high',
327
+ 'forecast': 3.75,
328
+ 'previous': 4.00,
329
+ 'actual': None,
330
+ 'time_to_event': 'in 3d 12h',
331
+ 'timestamp': now,
332
+ 'source': 'Economic Calendar',
333
+ 'url': 'https://www.investing.com/economic-calendar/',
334
+ 'impact': 'high',
335
+ 'sentiment': 'neutral'
336
+ },
337
+ {
338
+ 'id': 4,
339
+ 'title': 'US - GDP Growth Rate',
340
+ 'event_name': 'GDP Growth Rate',
341
+ 'event_date': now + timedelta(days=5, hours=8, minutes=30),
342
+ 'country': 'US',
343
+ 'category': 'gdp',
344
+ 'importance': 'high',
345
+ 'forecast': 2.8,
346
+ 'previous': 2.5,
347
+ 'actual': None,
348
+ 'time_to_event': 'in 5d 8h',
349
+ 'timestamp': now,
350
+ 'source': 'Economic Calendar',
351
+ 'url': 'https://www.investing.com/economic-calendar/',
352
+ 'impact': 'high',
353
+ 'sentiment': 'neutral'
354
+ },
355
+ {
356
+ 'id': 5,
357
+ 'title': 'US - Manufacturing PMI',
358
+ 'event_name': 'Manufacturing PMI',
359
+ 'event_date': now + timedelta(days=1, hours=10),
360
+ 'country': 'US',
361
+ 'category': 'pmi',
362
+ 'importance': 'medium',
363
+ 'forecast': 51.5,
364
+ 'previous': 50.8,
365
+ 'actual': None,
366
+ 'time_to_event': 'in 1d 10h',
367
+ 'timestamp': now,
368
+ 'source': 'Economic Calendar',
369
+ 'url': 'https://www.investing.com/economic-calendar/',
370
+ 'impact': 'medium',
371
+ 'sentiment': 'neutral'
372
+ }
373
+ ]
374
+
375
+ def get_todays_events(self) -> List[Dict]:
376
+ """Get events happening today"""
377
+ all_events = self.get_upcoming_events(days_ahead=1)
378
+ today = datetime.now().date()
379
+
380
+ todays_events = [
381
+ event for event in all_events
382
+ if event['event_date'].date() == today
383
+ ]
384
+
385
+ return todays_events
app/services/market_events.py ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Market Events Scraper - Earnings, Economic Indicators & Central Bank Events
3
+ Aggregates upcoming and recent market-moving events
4
+ Web scraping approach - no API keys required
5
+ """
6
+
7
+ from datetime import datetime, timedelta
8
+ from typing import List, Dict, Optional
9
+ import logging
10
+ import re
11
+ from concurrent.futures import ThreadPoolExecutor
12
+
13
+ import requests
14
+ import feedparser
15
+ from bs4 import BeautifulSoup
16
+
17
+ # Configure logging
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class MarketEventsScraper:
23
+ """
24
+ Scrapes market events from multiple sources
25
+ Focus: Earnings, economic indicators, central bank announcements
26
+ """
27
+
28
+ # Central bank RSS feeds (already in use for news)
29
+ CENTRAL_BANKS = {
30
+ 'fed': {
31
+ 'name': 'Federal Reserve',
32
+ 'rss': 'https://www.federalreserve.gov/feeds/press_all.xml',
33
+ 'weight': 2.0
34
+ },
35
+ 'ecb': {
36
+ 'name': 'European Central Bank',
37
+ 'rss': 'https://www.ecb.europa.eu/rss/press.xml',
38
+ 'weight': 2.0
39
+ }
40
+ }
41
+
42
+ def __init__(self):
43
+ """Initialize scraper"""
44
+ self.session = requests.Session()
45
+ self.session.headers.update({
46
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
47
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
48
+ 'Accept-Language': 'en-US,en;q=0.9',
49
+ })
50
+
51
+ def scrape_market_events(self, max_items: int = 50, days_ahead: int = 14) -> List[Dict]:
52
+ """
53
+ Scrape market events from all sources
54
+ Returns unified list sorted by date and impact
55
+ """
56
+ all_events = []
57
+ seen_urls = set()
58
+
59
+ # Parallel fetching
60
+ with ThreadPoolExecutor(max_workers=3) as executor:
61
+ futures = []
62
+
63
+ # Submit tasks
64
+ futures.append((executor.submit(self._fetch_earnings), 'earnings'))
65
+ futures.append((executor.submit(self._fetch_economic_indicators), 'indicators'))
66
+ futures.append((executor.submit(self._fetch_central_bank_events), 'central_banks'))
67
+
68
+ for future, source_type in futures:
69
+ try:
70
+ events = future.result(timeout=35)
71
+
72
+ # Deduplicate by URL
73
+ for event in events:
74
+ if event['url'] not in seen_urls:
75
+ seen_urls.add(event['url'])
76
+ all_events.append(event)
77
+
78
+ logger.info(f"Fetched {len(events)} events from {source_type}")
79
+
80
+ except Exception as e:
81
+ logger.error(f"Error fetching {source_type}: {e}")
82
+
83
+ # If no events fetched, use mock data
84
+ if not all_events:
85
+ logger.warning("No market events fetched - using mock data")
86
+ return self._get_mock_events()
87
+
88
+ # Sort by event date and impact
89
+ all_events.sort(
90
+ key=lambda x: (x.get('event_date', x['timestamp']), x['impact'] != 'high'),
91
+ )
92
+
93
+ return all_events[:max_items]
94
+
95
+ def _fetch_earnings(self) -> List[Dict]:
96
+ """
97
+ Fetch earnings calendar from Yahoo Finance
98
+ Web scraping approach
99
+ """
100
+ try:
101
+ url = 'https://finance.yahoo.com/calendar/earnings'
102
+ response = self.session.get(url, timeout=10)
103
+ response.raise_for_status()
104
+
105
+ soup = BeautifulSoup(response.content, 'html.parser')
106
+ events = []
107
+
108
+ # Yahoo Finance uses a table for earnings
109
+ table = soup.find('table', {'class': re.compile('earnings')})
110
+
111
+ if not table:
112
+ logger.warning("Could not find earnings table on Yahoo Finance")
113
+ return self._get_mock_earnings()
114
+
115
+ rows = table.find_all('tr')[1:20] # Skip header, limit to 20
116
+
117
+ for row in rows:
118
+ try:
119
+ cells = row.find_all('td')
120
+ if len(cells) < 4:
121
+ continue
122
+
123
+ # Parse cells
124
+ ticker = cells[0].get_text(strip=True)
125
+ company = cells[1].get_text(strip=True) if len(cells) > 1 else ticker
126
+ eps_estimate = cells[2].get_text(strip=True) if len(cells) > 2 else 'N/A'
127
+ reported_eps = cells[3].get_text(strip=True) if len(cells) > 3 else None
128
+ event_time = cells[4].get_text(strip=True) if len(cells) > 4 else 'N/A'
129
+
130
+ # Create event
131
+ event_date = self._parse_earnings_date(event_time)
132
+
133
+ events.append({
134
+ 'id': hash(f"earnings_{ticker}_{event_date}"),
135
+ 'title': f"{company} ({ticker}) Earnings Report",
136
+ 'summary': f"Expected EPS: {eps_estimate}" + (f", Reported: {reported_eps}" if reported_eps and reported_eps != 'N/A' else ''),
137
+ 'source': 'Yahoo Finance',
138
+ 'category': 'earnings',
139
+ 'timestamp': datetime.now(),
140
+ 'event_date': event_date,
141
+ 'url': f"https://finance.yahoo.com/quote/{ticker}",
142
+ 'event_type': 'earnings',
143
+ 'ticker': ticker,
144
+ 'expected_value': self._parse_float(eps_estimate),
145
+ 'actual_value': self._parse_float(reported_eps) if reported_eps else None,
146
+ 'previous_value': None,
147
+ 'impact': 'medium', # Earnings are generally medium impact
148
+ 'sentiment': self._determine_earnings_sentiment(eps_estimate, reported_eps),
149
+ 'is_breaking': False,
150
+ 'source_weight': 1.3,
151
+ 'likes': 0,
152
+ 'retweets': 0
153
+ })
154
+
155
+ except Exception as e:
156
+ logger.debug(f"Error parsing earnings row: {e}")
157
+ continue
158
+
159
+ return events if events else self._get_mock_earnings()
160
+
161
+ except Exception as e:
162
+ logger.error(f"Error fetching earnings: {e}")
163
+ return self._get_mock_earnings()
164
+
165
+ def _fetch_economic_indicators(self) -> List[Dict]:
166
+ """
167
+ Fetch economic indicators from FRED and other sources
168
+ Uses RSS feeds
169
+ """
170
+ try:
171
+ events = []
172
+
173
+ # FRED Economic Data releases (via RSS - if available)
174
+ # For now, use mock data as FRED RSS is primarily historical data
175
+ # Real implementation would scrape FRED release calendar
176
+
177
+ events.extend(self._get_mock_indicators())
178
+
179
+ return events
180
+
181
+ except Exception as e:
182
+ logger.error(f"Error fetching economic indicators: {e}")
183
+ return self._get_mock_indicators()
184
+
185
+ def _fetch_central_bank_events(self) -> List[Dict]:
186
+ """
187
+ Fetch central bank announcements from RSS feeds
188
+ """
189
+ events = []
190
+
191
+ for bank_id, bank_info in self.CENTRAL_BANKS.items():
192
+ try:
193
+ feed = feedparser.parse(bank_info['rss'])
194
+
195
+ for entry in feed.entries[:10]:
196
+ try:
197
+ # Parse timestamp
198
+ if hasattr(entry, 'published_parsed') and entry.published_parsed:
199
+ timestamp = datetime(*entry.published_parsed[:6])
200
+ else:
201
+ timestamp = datetime.now()
202
+
203
+ # Skip old events (>7 days)
204
+ if (datetime.now() - timestamp).days > 7:
205
+ continue
206
+
207
+ title = entry.get('title', '')
208
+ summary = entry.get('summary', '') or title
209
+ url = entry.get('link', '')
210
+
211
+ # Clean HTML from summary
212
+ if summary:
213
+ summary = BeautifulSoup(summary, 'html.parser').get_text()
214
+ summary = summary[:200] + '...' if len(summary) > 200 else summary
215
+
216
+ events.append({
217
+ 'id': hash(url),
218
+ 'title': f"{bank_info['name']}: {title}",
219
+ 'summary': summary,
220
+ 'source': bank_info['name'],
221
+ 'category': 'central_bank',
222
+ 'timestamp': timestamp,
223
+ 'event_date': timestamp,
224
+ 'url': url,
225
+ 'event_type': 'central_bank_announcement',
226
+ 'ticker': None,
227
+ 'expected_value': None,
228
+ 'actual_value': None,
229
+ 'previous_value': None,
230
+ 'impact': 'high', # Central bank events are high impact
231
+ 'sentiment': 'neutral',
232
+ 'is_breaking': (datetime.now() - timestamp).days < 1,
233
+ 'source_weight': bank_info['weight'],
234
+ 'likes': 0,
235
+ 'retweets': 0
236
+ })
237
+
238
+ except Exception as e:
239
+ logger.debug(f"Error parsing {bank_id} entry: {e}")
240
+ continue
241
+
242
+ except Exception as e:
243
+ logger.error(f"Error fetching {bank_id} RSS: {e}")
244
+
245
+ return events
246
+
247
+ def _parse_earnings_date(self, time_str: str) -> datetime:
248
+ """Parse earnings report time"""
249
+ # Yahoo Finance uses "Before Market Open", "After Market Close", or specific dates
250
+ now = datetime.now()
251
+
252
+ if 'Before Market' in time_str or 'BMO' in time_str:
253
+ return now.replace(hour=7, minute=0, second=0, microsecond=0)
254
+ elif 'After Market' in time_str or 'AMC' in time_str:
255
+ return now.replace(hour=16, minute=0, second=0, microsecond=0)
256
+ else:
257
+ # Default to tomorrow morning
258
+ return (now + timedelta(days=1)).replace(hour=7, minute=0, second=0, microsecond=0)
259
+
260
+ def _parse_float(self, value_str: str) -> Optional[float]:
261
+ """Parse float from string"""
262
+ if not value_str or value_str == 'N/A' or value_str == '-':
263
+ return None
264
+
265
+ try:
266
+ # Remove $ and other non-numeric characters except . and -
267
+ cleaned = re.sub(r'[^\d.-]', '', value_str)
268
+ return float(cleaned)
269
+ except:
270
+ return None
271
+
272
+ def _determine_earnings_sentiment(self, expected: str, actual: Optional[str]) -> str:
273
+ """Determine sentiment based on earnings beat/miss"""
274
+ if not actual or actual == 'N/A':
275
+ return 'neutral'
276
+
277
+ exp_val = self._parse_float(expected)
278
+ act_val = self._parse_float(actual)
279
+
280
+ if exp_val is None or act_val is None:
281
+ return 'neutral'
282
+
283
+ if act_val > exp_val:
284
+ return 'positive' # Beat
285
+ elif act_val < exp_val:
286
+ return 'negative' # Miss
287
+ else:
288
+ return 'neutral' # In-line
289
+
290
+ def _get_mock_earnings(self) -> List[Dict]:
291
+ """Mock earnings data"""
292
+ now = datetime.now()
293
+
294
+ return [
295
+ {
296
+ 'id': 1,
297
+ 'title': 'Apple Inc. (AAPL) Earnings Report',
298
+ 'summary': 'Expected EPS: $2.10',
299
+ 'source': 'Yahoo Finance',
300
+ 'category': 'earnings',
301
+ 'timestamp': now,
302
+ 'event_date': now + timedelta(days=2, hours=16),
303
+ 'url': 'https://finance.yahoo.com/quote/AAPL',
304
+ 'event_type': 'earnings',
305
+ 'ticker': 'AAPL',
306
+ 'expected_value': 2.10,
307
+ 'actual_value': None,
308
+ 'previous_value': 1.95,
309
+ 'impact': 'high',
310
+ 'sentiment': 'neutral',
311
+ 'is_breaking': False,
312
+ 'source_weight': 1.5,
313
+ 'likes': 0,
314
+ 'retweets': 0
315
+ },
316
+ {
317
+ 'id': 2,
318
+ 'title': 'Microsoft Corporation (MSFT) Earnings Report',
319
+ 'summary': 'Expected EPS: $2.75',
320
+ 'source': 'Yahoo Finance',
321
+ 'category': 'earnings',
322
+ 'timestamp': now,
323
+ 'event_date': now + timedelta(days=3, hours=16),
324
+ 'url': 'https://finance.yahoo.com/quote/MSFT',
325
+ 'event_type': 'earnings',
326
+ 'ticker': 'MSFT',
327
+ 'expected_value': 2.75,
328
+ 'actual_value': None,
329
+ 'previous_value': 2.50,
330
+ 'impact': 'high',
331
+ 'sentiment': 'neutral',
332
+ 'is_breaking': False,
333
+ 'source_weight': 1.5,
334
+ 'likes': 0,
335
+ 'retweets': 0
336
+ }
337
+ ]
338
+
339
+ def _get_mock_indicators(self) -> List[Dict]:
340
+ """Mock economic indicator data"""
341
+ now = datetime.now()
342
+
343
+ return [
344
+ {
345
+ 'id': 3,
346
+ 'title': 'US Retail Sales Data Release',
347
+ 'summary': 'Monthly retail sales figures',
348
+ 'source': 'US Census Bureau',
349
+ 'category': 'economic_indicator',
350
+ 'timestamp': now,
351
+ 'event_date': now + timedelta(days=1, hours=8, minutes=30),
352
+ 'url': 'https://www.census.gov/retail/',
353
+ 'event_type': 'retail_sales',
354
+ 'ticker': None,
355
+ 'expected_value': 0.5,
356
+ 'actual_value': None,
357
+ 'previous_value': 0.3,
358
+ 'impact': 'medium',
359
+ 'sentiment': 'neutral',
360
+ 'is_breaking': False,
361
+ 'source_weight': 1.6,
362
+ 'likes': 0,
363
+ 'retweets': 0
364
+ }
365
+ ]
366
+
367
+ def _get_mock_events(self) -> List[Dict]:
368
+ """Combined mock data"""
369
+ return self._get_mock_earnings() + self._get_mock_indicators() + [
370
+ {
371
+ 'id': 4,
372
+ 'title': 'Federal Reserve: FOMC Meeting Minutes Released',
373
+ 'summary': 'Minutes from the latest Federal Open Market Committee meeting',
374
+ 'source': 'Federal Reserve',
375
+ 'category': 'central_bank',
376
+ 'timestamp': datetime.now() - timedelta(hours=2),
377
+ 'event_date': datetime.now() - timedelta(hours=2),
378
+ 'url': 'https://www.federalreserve.gov/',
379
+ 'event_type': 'central_bank_announcement',
380
+ 'ticker': None,
381
+ 'expected_value': None,
382
+ 'actual_value': None,
383
+ 'previous_value': None,
384
+ 'impact': 'high',
385
+ 'sentiment': 'neutral',
386
+ 'is_breaking': True,
387
+ 'source_weight': 2.0,
388
+ 'likes': 0,
389
+ 'retweets': 0
390
+ }
391
+ ]
app/services/news_monitor.py ADDED
@@ -0,0 +1,593 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Professional Finance News Monitor using snscrape
3
+ Real-time tracking: Macro, Markets, Geopolitical intelligence
4
+ Optimized for low-latency trading decisions
5
+ """
6
+
7
+ import pandas as pd
8
+ from datetime import datetime, timedelta
9
+ from typing import List, Dict, Optional
10
+ import streamlit as st
11
+ import time
12
+ import logging
13
+ import re
14
+
15
+ # Configure logging
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+ try:
20
+ import snscrape.modules.twitter as sntwitter
21
+ SNSCRAPE_AVAILABLE = True
22
+ except ImportError:
23
+ SNSCRAPE_AVAILABLE = False
24
+ logger.warning("snscrape not available. Install with: pip install snscrape")
25
+
26
+
27
+ class FinanceNewsMonitor:
28
+ """
29
+ Professional-grade financial news aggregator
30
+ Sources: Bloomberg, Reuters, WSJ, FT, CNBC, ZeroHedge
31
+ """
32
+
33
+ # Premium financial sources - expanded coverage
34
+ SOURCES = {
35
+ # ===== TIER 1: Major Financial News =====
36
+ 'reuters': {
37
+ 'handle': '@Reuters',
38
+ 'weight': 1.5,
39
+ 'specialization': ['macro', 'geopolitical', 'markets']
40
+ },
41
+ 'bloomberg': {
42
+ 'handle': '@business',
43
+ 'weight': 1.5,
44
+ 'specialization': ['macro', 'markets']
45
+ },
46
+ 'ft': {
47
+ 'handle': '@FT',
48
+ 'weight': 1.4,
49
+ 'specialization': ['macro', 'markets']
50
+ },
51
+ 'economist': {
52
+ 'handle': '@TheEconomist',
53
+ 'weight': 1.3,
54
+ 'specialization': ['macro', 'geopolitical']
55
+ },
56
+ 'wsj': {
57
+ 'handle': '@WSJ',
58
+ 'weight': 1.4,
59
+ 'specialization': ['markets', 'macro']
60
+ },
61
+ 'bloomberg_terminal': {
62
+ 'handle': '@Bloomberg',
63
+ 'weight': 1.5,
64
+ 'specialization': ['macro', 'markets']
65
+ },
66
+ 'cnbc': {
67
+ 'handle': '@CNBC',
68
+ 'weight': 1.2,
69
+ 'specialization': ['markets']
70
+ },
71
+ 'marketwatch': {
72
+ 'handle': '@MarketWatch',
73
+ 'weight': 1.1,
74
+ 'specialization': ['markets']
75
+ },
76
+
77
+ # ===== TIER 2: Geopolitical Intelligence =====
78
+ 'bbc_world': {
79
+ 'handle': '@BBCWorld',
80
+ 'weight': 1.4,
81
+ 'specialization': ['geopolitical']
82
+ },
83
+ 'afp': {
84
+ 'handle': '@AFP',
85
+ 'weight': 1.3,
86
+ 'specialization': ['geopolitical']
87
+ },
88
+ 'aljazeera': {
89
+ 'handle': '@AlJazeera',
90
+ 'weight': 1.2,
91
+ 'specialization': ['geopolitical']
92
+ },
93
+ 'politico': {
94
+ 'handle': '@politico',
95
+ 'weight': 1.2,
96
+ 'specialization': ['geopolitical', 'macro']
97
+ },
98
+ 'dw_news': {
99
+ 'handle': '@dwnews',
100
+ 'weight': 1.2,
101
+ 'specialization': ['geopolitical']
102
+ },
103
+
104
+ # ===== TIER 3: Central Banks & Official Sources =====
105
+ 'federal_reserve': {
106
+ 'handle': '@federalreserve',
107
+ 'weight': 2.0, # Highest priority
108
+ 'specialization': ['macro']
109
+ },
110
+ 'ecb': {
111
+ 'handle': '@ecb',
112
+ 'weight': 2.0,
113
+ 'specialization': ['macro']
114
+ },
115
+ 'lagarde': {
116
+ 'handle': '@Lagarde',
117
+ 'weight': 1.9, # ECB President
118
+ 'specialization': ['macro']
119
+ },
120
+ 'bank_of_england': {
121
+ 'handle': '@bankofengland',
122
+ 'weight': 1.8,
123
+ 'specialization': ['macro']
124
+ },
125
+ 'imf': {
126
+ 'handle': '@IMFNews',
127
+ 'weight': 1.7,
128
+ 'specialization': ['macro', 'geopolitical']
129
+ },
130
+ 'world_bank': {
131
+ 'handle': '@worldbank',
132
+ 'weight': 1.6,
133
+ 'specialization': ['macro', 'geopolitical']
134
+ },
135
+ 'us_treasury': {
136
+ 'handle': '@USTreasury',
137
+ 'weight': 1.8,
138
+ 'specialization': ['macro']
139
+ },
140
+
141
+ # ===== TIER 4: Alpha Accounts (Fast Breaking News) =====
142
+ 'zerohedge': {
143
+ 'handle': '@zerohedge',
144
+ 'weight': 1.0,
145
+ 'specialization': ['markets', 'macro']
146
+ },
147
+ 'first_squawk': {
148
+ 'handle': '@FirstSquawk',
149
+ 'weight': 1.1, # Fast alerts
150
+ 'specialization': ['markets', 'macro']
151
+ },
152
+ 'live_squawk': {
153
+ 'handle': '@LiveSquawk',
154
+ 'weight': 1.1, # Real-time market squawks
155
+ 'specialization': ['markets', 'macro']
156
+ }
157
+ }
158
+
159
+ # Enhanced keyword detection for professional traders
160
+ MACRO_KEYWORDS = [
161
+ # Central Banks & Policy
162
+ 'Fed', 'ECB', 'BoE', 'BoJ', 'FOMC', 'Powell', 'Lagarde',
163
+ 'interest rate', 'rate cut', 'rate hike', 'QE', 'quantitative',
164
+ 'monetary policy', 'dovish', 'hawkish',
165
+ # Economic Indicators
166
+ 'GDP', 'inflation', 'CPI', 'PPI', 'PCE', 'NFP', 'payroll',
167
+ 'unemployment', 'jobless', 'retail sales', 'PMI', 'ISM',
168
+ 'consumer confidence', 'durable goods', 'housing starts',
169
+ # Fiscal & Economic
170
+ 'recession', 'stimulus', 'fiscal policy', 'treasury',
171
+ 'yield curve', 'bond market'
172
+ ]
173
+
174
+ GEO_KEYWORDS = [
175
+ # Conflict & Security
176
+ 'war', 'conflict', 'military', 'missile', 'attack', 'invasion',
177
+ 'sanctions', 'embargo', 'blockade',
178
+ # Political
179
+ 'election', 'impeachment', 'coup', 'protest', 'unrest',
180
+ 'geopolitical', 'tension', 'crisis', 'dispute',
181
+ # Trade & Relations
182
+ 'trade war', 'tariff', 'trade deal', 'summit', 'treaty',
183
+ 'China', 'Russia', 'Taiwan', 'Middle East', 'Ukraine'
184
+ ]
185
+
186
+ MARKET_KEYWORDS = [
187
+ # Indices & General
188
+ 'S&P', 'Nasdaq', 'Dow', 'Russell', 'VIX', 'volatility',
189
+ 'rally', 'sell-off', 'correction', 'crash', 'bull', 'bear',
190
+ # Corporate Events
191
+ 'earnings', 'EPS', 'revenue', 'guidance', 'beat', 'miss',
192
+ 'IPO', 'merger', 'acquisition', 'M&A', 'buyback', 'dividend',
193
+ # Sectors & Assets
194
+ 'tech stocks', 'banks', 'energy', 'commodities', 'crypto',
195
+ 'Bitcoin', 'oil', 'gold', 'dollar', 'DXY'
196
+ ]
197
+
198
+ # High-impact market-moving keywords
199
+ BREAKING_KEYWORDS = [
200
+ 'BREAKING', 'ALERT', 'URGENT', 'just in', 'developing',
201
+ 'Fed', 'Powell', 'emergency', 'unexpected', 'surprise'
202
+ ]
203
+
204
+ def __init__(self):
205
+ self.news_cache = []
206
+ self.last_fetch = None
207
+ self.cache_ttl = 180 # 3 minutes for low latency
208
+
209
+ @st.cache_data(ttl=180)
210
+ def scrape_twitter_news(_self, max_tweets: int = 100) -> List[Dict]:
211
+ """
212
+ Scrape latest financial news with caching
213
+ max_tweets: Total tweets to fetch (distributed across sources)
214
+ """
215
+ if not SNSCRAPE_AVAILABLE:
216
+ logger.info("snscrape not available - using mock data")
217
+ return _self._get_mock_news()
218
+
219
+ all_tweets = []
220
+ tweets_per_source = max(5, max_tweets // len(_self.SOURCES))
221
+ failed_sources = 0
222
+
223
+ for source_name, source_info in _self.SOURCES.items():
224
+ try:
225
+ handle = source_info['handle'].replace('@', '')
226
+ # Optimized query: exclude replies and retweets for signal clarity
227
+ query = f"from:{handle} -filter:replies -filter:retweets"
228
+
229
+ scraped = 0
230
+ for tweet in sntwitter.TwitterSearchScraper(query).get_items():
231
+ if scraped >= tweets_per_source:
232
+ break
233
+
234
+ # Skip old tweets (>24h)
235
+ if (datetime.now() - tweet.date).days > 1:
236
+ continue
237
+
238
+ # Categorize and analyze
239
+ category = _self._categorize_tweet(tweet.content, source_info['specialization'])
240
+ sentiment = _self._analyze_sentiment(tweet.content)
241
+ impact = _self._assess_impact(tweet, source_info['weight'])
242
+ is_breaking = _self._detect_breaking_news(tweet.content)
243
+
244
+ all_tweets.append({
245
+ 'id': tweet.id,
246
+ 'title': tweet.content,
247
+ 'summary': _self._extract_summary(tweet.content),
248
+ 'source': source_name.capitalize(),
249
+ 'category': category,
250
+ 'timestamp': tweet.date,
251
+ 'sentiment': sentiment,
252
+ 'impact': impact,
253
+ 'url': tweet.url,
254
+ 'likes': tweet.likeCount or 0,
255
+ 'retweets': tweet.retweetCount or 0,
256
+ 'is_breaking': is_breaking,
257
+ 'source_weight': source_info['weight']
258
+ })
259
+ scraped += 1
260
+
261
+ except Exception as e:
262
+ failed_sources += 1
263
+ error_msg = str(e).lower()
264
+ if 'blocked' in error_msg or '404' in error_msg:
265
+ logger.warning(f"Twitter/X API blocked access for {source_name}")
266
+ else:
267
+ logger.error(f"Error scraping {source_name}: {e}")
268
+ continue
269
+
270
+ # If Twitter/X blocked all sources, fall back to mock data
271
+ if failed_sources >= len(_self.SOURCES) or len(all_tweets) == 0:
272
+ logger.warning("Twitter/X API unavailable - falling back to mock data for demonstration")
273
+ return _self._get_mock_news()
274
+
275
+ # Sort by impact and timestamp
276
+ all_tweets.sort(
277
+ key=lambda x: (x['is_breaking'], x['impact'] == 'high', x['timestamp']),
278
+ reverse=True
279
+ )
280
+
281
+ return all_tweets
282
+
283
+ def _categorize_tweet(self, text: str, source_specialization: List[str]) -> str:
284
+ """Advanced categorization with source specialization"""
285
+ text_lower = text.lower()
286
+
287
+ # Calculate weighted scores
288
+ macro_score = sum(2 if kw.lower() in text_lower else 0
289
+ for kw in self.MACRO_KEYWORDS)
290
+ geo_score = sum(2 if kw.lower() in text_lower else 0
291
+ for kw in self.GEO_KEYWORDS)
292
+ market_score = sum(2 if kw.lower() in text_lower else 0
293
+ for kw in self.MARKET_KEYWORDS)
294
+
295
+ # Boost scores based on source specialization
296
+ if 'macro' in source_specialization:
297
+ macro_score *= 1.5
298
+ if 'geopolitical' in source_specialization:
299
+ geo_score *= 1.5
300
+ if 'markets' in source_specialization:
301
+ market_score *= 1.5
302
+
303
+ scores = {
304
+ 'macro': macro_score,
305
+ 'geopolitical': geo_score,
306
+ 'markets': market_score
307
+ }
308
+
309
+ return max(scores, key=scores.get) if max(scores.values()) > 0 else 'general'
310
+
311
+ def _analyze_sentiment(self, text: str) -> str:
312
+ """Professional sentiment analysis for trading"""
313
+ positive_words = [
314
+ 'surge', 'rally', 'soar', 'jump', 'gain', 'rise', 'climb',
315
+ 'growth', 'positive', 'strong', 'robust', 'beat', 'exceed',
316
+ 'outperform', 'record high', 'breakthrough', 'optimistic'
317
+ ]
318
+ negative_words = [
319
+ 'plunge', 'crash', 'tumble', 'fall', 'drop', 'decline', 'slump',
320
+ 'loss', 'weak', 'fragile', 'crisis', 'concern', 'risk', 'fear',
321
+ 'miss', 'disappoint', 'warning', 'downgrade', 'recession'
322
+ ]
323
+
324
+ text_lower = text.lower()
325
+ pos_count = sum(2 if word in text_lower else 0 for word in positive_words)
326
+ neg_count = sum(2 if word in text_lower else 0 for word in negative_words)
327
+
328
+ # Threshold for clear signal
329
+ if pos_count > neg_count + 1:
330
+ return 'positive'
331
+ elif neg_count > pos_count + 1:
332
+ return 'negative'
333
+ return 'neutral'
334
+
335
+ def _assess_impact(self, tweet, source_weight: float) -> str:
336
+ """Assess market impact based on engagement and source credibility"""
337
+ engagement = (tweet.likeCount or 0) + (tweet.retweetCount or 0) * 2
338
+ weighted_engagement = engagement * source_weight
339
+
340
+ # Breaking news always high impact
341
+ if self._detect_breaking_news(tweet.content):
342
+ return 'high'
343
+
344
+ if weighted_engagement > 1500 or source_weight >= 2.0:
345
+ return 'high'
346
+ elif weighted_engagement > 300:
347
+ return 'medium'
348
+ return 'low'
349
+
350
+ def _detect_breaking_news(self, text: str) -> bool:
351
+ """Detect breaking/urgent news for immediate alerts"""
352
+ text_upper = text.upper()
353
+ return any(keyword.upper() in text_upper for keyword in self.BREAKING_KEYWORDS)
354
+
355
+ def _extract_summary(self, text: str, max_length: int = 200) -> str:
356
+ """Extract clean summary for display"""
357
+ # Remove URLs
358
+ import re
359
+ text = re.sub(r'http\S+', '', text)
360
+ text = text.strip()
361
+
362
+ if len(text) <= max_length:
363
+ return text
364
+ return text[:max_length] + '...'
365
+
366
+ def _get_mock_news(self) -> List[Dict]:
367
+ """Mock news data when snscrape is unavailable - Showcases all source types"""
368
+ return [
369
+ # Tier 3: Central Bank - BREAKING
370
+ {
371
+ 'id': 1,
372
+ 'title': 'BREAKING: Federal Reserve announces emergency rate cut of 50bps - Powell cites economic uncertainty',
373
+ 'summary': 'BREAKING: Fed emergency rate cut 50bps',
374
+ 'source': 'Federal Reserve',
375
+ 'category': 'macro',
376
+ 'timestamp': datetime.now() - timedelta(minutes=5),
377
+ 'sentiment': 'negative',
378
+ 'impact': 'high',
379
+ 'url': 'https://twitter.com/federalreserve',
380
+ 'likes': 5000,
381
+ 'retweets': 2000,
382
+ 'is_breaking': True,
383
+ 'source_weight': 2.0
384
+ },
385
+ # Tier 4: Alpha Account - Fast Alert
386
+ {
387
+ 'id': 2,
388
+ 'title': '*FIRST SQUAWK: S&P 500 FUTURES DROP 2% AFTER FED ANNOUNCEMENT',
389
+ 'summary': '*FIRST SQUAWK: S&P 500 futures drop 2%',
390
+ 'source': 'First Squawk',
391
+ 'category': 'markets',
392
+ 'timestamp': datetime.now() - timedelta(minutes=10),
393
+ 'sentiment': 'negative',
394
+ 'impact': 'high',
395
+ 'url': 'https://twitter.com/FirstSquawk',
396
+ 'likes': 1500,
397
+ 'retweets': 600,
398
+ 'is_breaking': False,
399
+ 'source_weight': 1.1
400
+ },
401
+ # Tier 1: Bloomberg - Markets
402
+ {
403
+ 'id': 3,
404
+ 'title': 'Apple reports earnings beat with $123B revenue, raises dividend by 4% - Stock up 3% after hours',
405
+ 'summary': 'Apple beats earnings, raises dividend 4%',
406
+ 'source': 'Bloomberg',
407
+ 'category': 'markets',
408
+ 'timestamp': datetime.now() - timedelta(minutes=25),
409
+ 'sentiment': 'positive',
410
+ 'impact': 'high',
411
+ 'url': 'https://twitter.com/business',
412
+ 'likes': 2800,
413
+ 'retweets': 900,
414
+ 'is_breaking': False,
415
+ 'source_weight': 1.5
416
+ },
417
+ # Tier 3: ECB President
418
+ {
419
+ 'id': 4,
420
+ 'title': 'ECB President Lagarde: Inflation remains above target, rates to stay higher for longer',
421
+ 'summary': 'Lagarde: rates to stay higher for longer',
422
+ 'source': 'Lagarde',
423
+ 'category': 'macro',
424
+ 'timestamp': datetime.now() - timedelta(minutes=45),
425
+ 'sentiment': 'neutral',
426
+ 'impact': 'high',
427
+ 'url': 'https://twitter.com/Lagarde',
428
+ 'likes': 1200,
429
+ 'retweets': 400,
430
+ 'is_breaking': False,
431
+ 'source_weight': 1.9
432
+ },
433
+ # Tier 2: Geopolitical - BBC
434
+ {
435
+ 'id': 5,
436
+ 'title': 'Ukraine conflict: New peace talks scheduled as tensions ease in Eastern Europe',
437
+ 'summary': 'Ukraine: New peace talks scheduled',
438
+ 'source': 'BBC World',
439
+ 'category': 'geopolitical',
440
+ 'timestamp': datetime.now() - timedelta(hours=1),
441
+ 'sentiment': 'positive',
442
+ 'impact': 'medium',
443
+ 'url': 'https://twitter.com/BBCWorld',
444
+ 'likes': 3500,
445
+ 'retweets': 1200,
446
+ 'is_breaking': False,
447
+ 'source_weight': 1.4
448
+ },
449
+ # Tier 1: Reuters - Macro
450
+ {
451
+ 'id': 6,
452
+ 'title': 'US GDP growth revised up to 2.8% in Q4, beating economists expectations of 2.5%',
453
+ 'summary': 'US GDP growth revised up to 2.8% in Q4',
454
+ 'source': 'Reuters',
455
+ 'category': 'macro',
456
+ 'timestamp': datetime.now() - timedelta(hours=2),
457
+ 'sentiment': 'positive',
458
+ 'impact': 'medium',
459
+ 'url': 'https://twitter.com/Reuters',
460
+ 'likes': 1800,
461
+ 'retweets': 600,
462
+ 'is_breaking': False,
463
+ 'source_weight': 1.5
464
+ },
465
+ # Tier 4: Live Squawk
466
+ {
467
+ 'id': 7,
468
+ 'title': '*LIVE SQUAWK: Oil prices surge 5% on Middle East supply concerns, Brent crude at $92/barrel',
469
+ 'summary': '*LIVE SQUAWK: Oil surges 5% on supply fears',
470
+ 'source': 'Live Squawk',
471
+ 'category': 'markets',
472
+ 'timestamp': datetime.now() - timedelta(hours=3),
473
+ 'sentiment': 'neutral',
474
+ 'impact': 'medium',
475
+ 'url': 'https://twitter.com/LiveSquawk',
476
+ 'likes': 900,
477
+ 'retweets': 350,
478
+ 'is_breaking': False,
479
+ 'source_weight': 1.1
480
+ },
481
+ # Tier 3: IMF
482
+ {
483
+ 'id': 8,
484
+ 'title': 'IMF upgrades global growth forecast to 3.2% for 2024, warns of recession risks in Europe',
485
+ 'summary': 'IMF upgrades global growth to 3.2%',
486
+ 'source': 'IMF',
487
+ 'category': 'macro',
488
+ 'timestamp': datetime.now() - timedelta(hours=4),
489
+ 'sentiment': 'neutral',
490
+ 'impact': 'medium',
491
+ 'url': 'https://twitter.com/IMFNews',
492
+ 'likes': 800,
493
+ 'retweets': 300,
494
+ 'is_breaking': False,
495
+ 'source_weight': 1.7
496
+ },
497
+ # Tier 2: Politico - Geopolitical
498
+ {
499
+ 'id': 9,
500
+ 'title': 'US-China trade talks resume in Washington, focus on technology transfer and tariffs',
501
+ 'summary': 'US-China trade talks resume',
502
+ 'source': 'Politico',
503
+ 'category': 'geopolitical',
504
+ 'timestamp': datetime.now() - timedelta(hours=5),
505
+ 'sentiment': 'neutral',
506
+ 'impact': 'low',
507
+ 'url': 'https://twitter.com/politico',
508
+ 'likes': 600,
509
+ 'retweets': 200,
510
+ 'is_breaking': False,
511
+ 'source_weight': 1.2
512
+ },
513
+ # Tier 1: FT - Markets
514
+ {
515
+ 'id': 10,
516
+ 'title': 'Bank of America cuts recession probability to 20%, cites resilient consumer spending',
517
+ 'summary': 'BofA cuts recession probability to 20%',
518
+ 'source': 'FT',
519
+ 'category': 'markets',
520
+ 'timestamp': datetime.now() - timedelta(hours=6),
521
+ 'sentiment': 'positive',
522
+ 'impact': 'low',
523
+ 'url': 'https://twitter.com/FT',
524
+ 'likes': 700,
525
+ 'retweets': 250,
526
+ 'is_breaking': False,
527
+ 'source_weight': 1.4
528
+ }
529
+ ]
530
+
531
+ def get_news(self, category: str = 'all', sentiment: str = 'all',
532
+ impact: str = 'all', refresh: bool = False) -> pd.DataFrame:
533
+ """
534
+ Get filtered news with intelligent caching
535
+
536
+ Args:
537
+ category: 'all', 'macro', 'geopolitical', 'markets'
538
+ sentiment: 'all', 'positive', 'negative', 'neutral'
539
+ impact: 'all', 'high', 'medium', 'low'
540
+ refresh: Force refresh cache
541
+ """
542
+ # Check cache freshness
543
+ if refresh or not self.last_fetch or \
544
+ (datetime.now() - self.last_fetch).seconds > self.cache_ttl:
545
+ self.news_cache = self.scrape_twitter_news(max_tweets=100)
546
+ self.last_fetch = datetime.now()
547
+
548
+ news = self.news_cache.copy()
549
+
550
+ # Apply filters
551
+ if category != 'all':
552
+ news = [n for n in news if n['category'] == category]
553
+
554
+ if sentiment != 'all':
555
+ news = [n for n in news if n['sentiment'] == sentiment]
556
+
557
+ if impact != 'all':
558
+ news = [n for n in news if n['impact'] == impact]
559
+
560
+ df = pd.DataFrame(news)
561
+ if not df.empty:
562
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
563
+
564
+ return df
565
+
566
+ def get_breaking_news(self) -> pd.DataFrame:
567
+ """Get only breaking/high-impact news for alerts"""
568
+ df = self.get_news()
569
+ if not df.empty:
570
+ return df[df['is_breaking'] == True].head(10)
571
+ return df
572
+
573
+ def get_statistics(self) -> Dict:
574
+ """Get news feed statistics"""
575
+ if not self.news_cache:
576
+ return {
577
+ 'total': 0,
578
+ 'high_impact': 0,
579
+ 'breaking': 0,
580
+ 'last_update': 'Never'
581
+ }
582
+
583
+ return {
584
+ 'total': len(self.news_cache),
585
+ 'high_impact': len([n for n in self.news_cache if n['impact'] == 'high']),
586
+ 'breaking': len([n for n in self.news_cache if n['is_breaking']]),
587
+ 'last_update': self.last_fetch.strftime('%H:%M:%S') if self.last_fetch else 'Never',
588
+ 'by_category': {
589
+ 'macro': len([n for n in self.news_cache if n['category'] == 'macro']),
590
+ 'geopolitical': len([n for n in self.news_cache if n['category'] == 'geopolitical']),
591
+ 'markets': len([n for n in self.news_cache if n['category'] == 'markets'])
592
+ }
593
+ }
app/services/news_monitor_twikit.py ADDED
@@ -0,0 +1,613 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Professional Finance News Monitor using Twikit
3
+ Real-time tracking: Macro, Markets, Geopolitical intelligence
4
+ Optimized for low-latency trading decisions
5
+ """
6
+
7
+ import pandas as pd
8
+ from datetime import datetime, timedelta
9
+ from typing import List, Dict, Optional
10
+ import streamlit as st
11
+ import os
12
+ import asyncio
13
+ import re
14
+ import logging
15
+ from dotenv import load_dotenv
16
+
17
+ # Configure logging
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Load environment variables
22
+ load_dotenv()
23
+
24
+ try:
25
+ from twikit import Client
26
+ TWIKIT_AVAILABLE = True
27
+ except ImportError:
28
+ TWIKIT_AVAILABLE = False
29
+ logger.warning("twikit not available. Install with: pip install twikit")
30
+
31
+
32
+ class FinanceNewsMonitor:
33
+ """
34
+ Professional-grade financial news aggregator using Twikit
35
+ Sources: Bloomberg, Reuters, WSJ, FT, CNBC, and 18 more premium sources
36
+ """
37
+
38
+ # Premium financial sources - expanded coverage
39
+ SOURCES = {
40
+ # ===== TIER 1: Major Financial News =====
41
+ 'reuters': {
42
+ 'handle': 'Reuters',
43
+ 'weight': 1.5,
44
+ 'specialization': ['macro', 'geopolitical', 'markets']
45
+ },
46
+ 'bloomberg': {
47
+ 'handle': 'business',
48
+ 'weight': 1.5,
49
+ 'specialization': ['macro', 'markets']
50
+ },
51
+ 'ft': {
52
+ 'handle': 'FT',
53
+ 'weight': 1.4,
54
+ 'specialization': ['macro', 'markets']
55
+ },
56
+ 'economist': {
57
+ 'handle': 'TheEconomist',
58
+ 'weight': 1.3,
59
+ 'specialization': ['macro', 'geopolitical']
60
+ },
61
+ 'wsj': {
62
+ 'handle': 'WSJ',
63
+ 'weight': 1.4,
64
+ 'specialization': ['markets', 'macro']
65
+ },
66
+ 'bloomberg_terminal': {
67
+ 'handle': 'Bloomberg',
68
+ 'weight': 1.5,
69
+ 'specialization': ['macro', 'markets']
70
+ },
71
+ 'cnbc': {
72
+ 'handle': 'CNBC',
73
+ 'weight': 1.2,
74
+ 'specialization': ['markets']
75
+ },
76
+ 'marketwatch': {
77
+ 'handle': 'MarketWatch',
78
+ 'weight': 1.1,
79
+ 'specialization': ['markets']
80
+ },
81
+
82
+ # ===== TIER 2: Geopolitical Intelligence =====
83
+ 'bbc_world': {
84
+ 'handle': 'BBCWorld',
85
+ 'weight': 1.4,
86
+ 'specialization': ['geopolitical']
87
+ },
88
+ 'afp': {
89
+ 'handle': 'AFP',
90
+ 'weight': 1.3,
91
+ 'specialization': ['geopolitical']
92
+ },
93
+ 'aljazeera': {
94
+ 'handle': 'AlJazeera',
95
+ 'weight': 1.2,
96
+ 'specialization': ['geopolitical']
97
+ },
98
+ 'politico': {
99
+ 'handle': 'politico',
100
+ 'weight': 1.2,
101
+ 'specialization': ['geopolitical', 'macro']
102
+ },
103
+ 'dw_news': {
104
+ 'handle': 'dwnews',
105
+ 'weight': 1.2,
106
+ 'specialization': ['geopolitical']
107
+ },
108
+
109
+ # ===== TIER 3: Central Banks & Official Sources =====
110
+ 'federal_reserve': {
111
+ 'handle': 'federalreserve',
112
+ 'weight': 2.0, # Highest priority
113
+ 'specialization': ['macro']
114
+ },
115
+ 'ecb': {
116
+ 'handle': 'ecb',
117
+ 'weight': 2.0,
118
+ 'specialization': ['macro']
119
+ },
120
+ 'lagarde': {
121
+ 'handle': 'Lagarde',
122
+ 'weight': 1.9, # ECB President
123
+ 'specialization': ['macro']
124
+ },
125
+ 'bank_of_england': {
126
+ 'handle': 'bankofengland',
127
+ 'weight': 1.8,
128
+ 'specialization': ['macro']
129
+ },
130
+ 'imf': {
131
+ 'handle': 'IMFNews',
132
+ 'weight': 1.7,
133
+ 'specialization': ['macro', 'geopolitical']
134
+ },
135
+ 'world_bank': {
136
+ 'handle': 'worldbank',
137
+ 'weight': 1.6,
138
+ 'specialization': ['macro', 'geopolitical']
139
+ },
140
+ 'us_treasury': {
141
+ 'handle': 'USTreasury',
142
+ 'weight': 1.8,
143
+ 'specialization': ['macro']
144
+ },
145
+
146
+ # ===== TIER 4: Alpha Accounts (Fast Breaking News) =====
147
+ 'zerohedge': {
148
+ 'handle': 'zerohedge',
149
+ 'weight': 1.0,
150
+ 'specialization': ['markets', 'macro']
151
+ },
152
+ 'first_squawk': {
153
+ 'handle': 'FirstSquawk',
154
+ 'weight': 1.1, # Fast alerts
155
+ 'specialization': ['markets', 'macro']
156
+ },
157
+ 'live_squawk': {
158
+ 'handle': 'LiveSquawk',
159
+ 'weight': 1.1, # Real-time market squawks
160
+ 'specialization': ['markets', 'macro']
161
+ }
162
+ }
163
+
164
+ # Enhanced keyword detection for professional traders
165
+ MACRO_KEYWORDS = [
166
+ # Central Banks & Policy
167
+ 'Fed', 'ECB', 'BoE', 'BoJ', 'FOMC', 'Powell', 'Lagarde',
168
+ 'interest rate', 'rate cut', 'rate hike', 'QE', 'quantitative',
169
+ 'monetary policy', 'inflation', 'CPI', 'PCE', 'tapering',
170
+ # Economic Data
171
+ 'GDP', 'unemployment', 'jobs report', 'NFP', 'payroll',
172
+ 'PMI', 'manufacturing', 'services', 'consumer confidence',
173
+ 'retail sales', 'housing starts', 'durable goods'
174
+ ]
175
+
176
+ MARKET_KEYWORDS = [
177
+ # Equities
178
+ 'S&P', 'Dow', 'Nasdaq', 'Russell', 'earnings', 'EPS',
179
+ 'stock', 'share', 'equity', 'rally', 'selloff', 'correction',
180
+ # Corporate
181
+ 'merger', 'acquisition', 'IPO', 'buyback', 'dividend',
182
+ 'guidance', 'revenue', 'profit', 'loss', 'bankruptcy'
183
+ ]
184
+
185
+ GEOPOLITICAL_KEYWORDS = [
186
+ # Conflicts & Relations
187
+ 'war', 'conflict', 'sanctions', 'trade', 'tariff', 'embargo',
188
+ 'summit', 'treaty', 'diplomacy', 'tension', 'crisis',
189
+ # Regions
190
+ 'Ukraine', 'Russia', 'China', 'Taiwan', 'Middle East',
191
+ 'Iran', 'North Korea', 'EU', 'Brexit'
192
+ ]
193
+
194
+ def __init__(self):
195
+ """Initialize monitor with caching"""
196
+ self.news_cache = []
197
+ self.last_fetch = None
198
+ self.cache_ttl = 180 # 3 minutes for low latency
199
+ self.client = None
200
+ self.authenticated = False
201
+
202
+ async def _authenticate_twikit(self):
203
+ """Authenticate with Twitter using Twikit"""
204
+ if not TWIKIT_AVAILABLE:
205
+ return False
206
+
207
+ try:
208
+ self.client = Client('en-US')
209
+
210
+ # Get credentials from environment variables
211
+ username = os.getenv('TWITTER_USERNAME')
212
+ email = os.getenv('TWITTER_EMAIL')
213
+ password = os.getenv('TWITTER_PASSWORD')
214
+
215
+ if not all([username, email, password]):
216
+ logger.warning("Twitter credentials not found in environment variables")
217
+ logger.info("Set TWITTER_USERNAME, TWITTER_EMAIL, TWITTER_PASSWORD in .env")
218
+ return False
219
+
220
+ await self.client.login(
221
+ auth_info_1=username,
222
+ auth_info_2=email,
223
+ password=password
224
+ )
225
+
226
+ self.authenticated = True
227
+ logger.info("Successfully authenticated with Twitter/X")
228
+ return True
229
+
230
+ except Exception as e:
231
+ logger.error(f"Twitter authentication failed: {e}")
232
+ return False
233
+
234
+ async def _scrape_twitter_async(self, max_tweets: int = 100) -> List[Dict]:
235
+ """Async method to scrape tweets using Twikit"""
236
+ if not self.authenticated:
237
+ auth_success = await self._authenticate_twikit()
238
+ if not auth_success:
239
+ return self._get_mock_news()
240
+
241
+ all_tweets = []
242
+ tweets_per_source = max(5, max_tweets // len(self.SOURCES))
243
+ failed_sources = 0
244
+
245
+ for source_name, source_info in self.SOURCES.items():
246
+ try:
247
+ handle = source_info['handle']
248
+
249
+ # Search for tweets from this user
250
+ tweets = await self.client.search_tweet(
251
+ f'from:{handle}',
252
+ product='Latest',
253
+ count=tweets_per_source
254
+ )
255
+
256
+ for tweet in tweets:
257
+ # Skip old tweets (>24h)
258
+ tweet_date = datetime.fromisoformat(tweet.created_at.replace('Z', '+00:00'))
259
+ if (datetime.now(tweet_date.tzinfo) - tweet_date).days > 1:
260
+ continue
261
+
262
+ # Skip retweets and replies
263
+ if hasattr(tweet, 'retweeted_tweet') or tweet.in_reply_to_user_id:
264
+ continue
265
+
266
+ # Categorize and analyze
267
+ category = self._categorize_tweet(tweet.text, source_info['specialization'])
268
+ sentiment = self._analyze_sentiment(tweet.text)
269
+ impact = self._assess_impact_twikit(tweet, source_info['weight'])
270
+ is_breaking = self._detect_breaking_news(tweet.text)
271
+
272
+ all_tweets.append({
273
+ 'id': int(tweet.id),
274
+ 'title': tweet.text,
275
+ 'summary': self._extract_summary(tweet.text),
276
+ 'source': source_name.replace('_', ' ').title(),
277
+ 'category': category,
278
+ 'timestamp': tweet_date.replace(tzinfo=None),
279
+ 'sentiment': sentiment,
280
+ 'impact': impact,
281
+ 'url': f'https://twitter.com/{handle}/status/{tweet.id}',
282
+ 'likes': tweet.favorite_count or 0,
283
+ 'retweets': tweet.retweet_count or 0,
284
+ 'is_breaking': is_breaking,
285
+ 'source_weight': source_info['weight']
286
+ })
287
+
288
+ except Exception as e:
289
+ failed_sources += 1
290
+ error_msg = str(e).lower()
291
+ if 'rate limit' in error_msg:
292
+ logger.warning(f"Rate limited for {source_name}")
293
+ elif 'unauthorized' in error_msg or 'forbidden' in error_msg:
294
+ logger.warning(f"Access denied for {source_name}")
295
+ else:
296
+ logger.error(f"Error scraping {source_name}: {e}")
297
+ continue
298
+
299
+ # If all sources failed, fall back to mock data
300
+ if failed_sources >= len(self.SOURCES) or len(all_tweets) == 0:
301
+ logger.warning("Twitter/X scraping failed - falling back to mock data")
302
+ return self._get_mock_news()
303
+
304
+ # Sort by impact and timestamp
305
+ all_tweets.sort(
306
+ key=lambda x: (x['is_breaking'], x['impact'] == 'high', x['timestamp']),
307
+ reverse=True
308
+ )
309
+
310
+ return all_tweets
311
+
312
+ @st.cache_data(ttl=180)
313
+ def scrape_twitter_news(_self, max_tweets: int = 100) -> List[Dict]:
314
+ """
315
+ Scrape latest financial news with caching (sync wrapper)
316
+ max_tweets: Total tweets to fetch (distributed across sources)
317
+ """
318
+ if not TWIKIT_AVAILABLE:
319
+ logger.info("Twikit not available - using mock data")
320
+ return _self._get_mock_news()
321
+
322
+ try:
323
+ # Run async scraping in event loop
324
+ loop = asyncio.new_event_loop()
325
+ asyncio.set_event_loop(loop)
326
+ result = loop.run_until_complete(_self._scrape_twitter_async(max_tweets))
327
+ loop.close()
328
+ return result
329
+ except Exception as e:
330
+ logger.error(f"Error in async scraping: {e}")
331
+ return _self._get_mock_news()
332
+
333
+ def _categorize_tweet(self, text: str, source_specialization: List[str]) -> str:
334
+ """Advanced categorization with source specialization"""
335
+ text_lower = text.lower()
336
+
337
+ # Count keyword matches
338
+ macro_score = sum(1 for kw in self.MACRO_KEYWORDS if kw.lower() in text_lower)
339
+ market_score = sum(1 for kw in self.MARKET_KEYWORDS if kw.lower() in text_lower)
340
+ geo_score = sum(1 for kw in self.GEOPOLITICAL_KEYWORDS if kw.lower() in text_lower)
341
+
342
+ # Weight by source specialization
343
+ if 'macro' in source_specialization:
344
+ macro_score *= 1.5
345
+ if 'markets' in source_specialization:
346
+ market_score *= 1.5
347
+ if 'geopolitical' in source_specialization:
348
+ geo_score *= 1.5
349
+
350
+ # Return highest scoring category
351
+ scores = {'macro': macro_score, 'markets': market_score, 'geopolitical': geo_score}
352
+ return max(scores, key=scores.get) if max(scores.values()) > 0 else 'markets'
353
+
354
+ def _analyze_sentiment(self, text: str) -> str:
355
+ """Professional sentiment analysis for traders"""
356
+ text_lower = text.lower()
357
+
358
+ positive_signals = ['surge', 'soar', 'rally', 'beat', 'upgrade', 'bullish',
359
+ 'gain', 'rise', 'jump', 'boost', 'optimistic', 'positive']
360
+ negative_signals = ['plunge', 'crash', 'fall', 'miss', 'downgrade', 'bearish',
361
+ 'loss', 'drop', 'slide', 'concern', 'worry', 'negative']
362
+
363
+ pos_count = sum(1 for signal in positive_signals if signal in text_lower)
364
+ neg_count = sum(1 for signal in negative_signals if signal in text_lower)
365
+
366
+ if pos_count > neg_count:
367
+ return 'positive'
368
+ elif neg_count > pos_count:
369
+ return 'negative'
370
+ return 'neutral'
371
+
372
+ def _assess_impact_twikit(self, tweet, source_weight: float) -> str:
373
+ """Assess market impact using Twikit tweet object"""
374
+ engagement = (tweet.favorite_count or 0) + (tweet.retweet_count or 0) * 2
375
+ weighted_engagement = engagement * source_weight
376
+
377
+ if weighted_engagement > 5000 or source_weight >= 1.8:
378
+ return 'high'
379
+ elif weighted_engagement > 1000:
380
+ return 'medium'
381
+ return 'low'
382
+
383
+ def _detect_breaking_news(self, text: str) -> bool:
384
+ """Detect breaking/urgent news"""
385
+ text_upper = text.upper()
386
+ breaking_signals = ['BREAKING', 'ALERT', 'URGENT', 'JUST IN',
387
+ '*FED', '*ECB', '*POWELL', '*LAGARDE']
388
+ return any(signal in text_upper for signal in breaking_signals)
389
+
390
+ def _extract_summary(self, text: str, max_length: int = 150) -> str:
391
+ """Extract clean summary from tweet"""
392
+ # Remove URLs
393
+ text = re.sub(r'http\S+', '', text)
394
+ text = text.strip()
395
+
396
+ if len(text) <= max_length:
397
+ return text
398
+ return text[:max_length] + '...'
399
+
400
+ def _get_mock_news(self) -> List[Dict]:
401
+ """Mock news data when Twikit is unavailable"""
402
+ return [
403
+ {
404
+ 'id': 1,
405
+ 'title': 'BREAKING: Federal Reserve announces emergency rate cut of 50bps - Powell cites economic uncertainty',
406
+ 'summary': 'BREAKING: Fed emergency rate cut 50bps',
407
+ 'source': 'Federal Reserve',
408
+ 'category': 'macro',
409
+ 'timestamp': datetime.now() - timedelta(minutes=5),
410
+ 'sentiment': 'negative',
411
+ 'impact': 'high',
412
+ 'url': 'https://twitter.com/federalreserve',
413
+ 'likes': 5000,
414
+ 'retweets': 2000,
415
+ 'is_breaking': True,
416
+ 'source_weight': 2.0
417
+ },
418
+ {
419
+ 'id': 2,
420
+ 'title': '*FIRST SQUAWK: S&P 500 FUTURES DROP 2% AFTER FED ANNOUNCEMENT',
421
+ 'summary': '*FIRST SQUAWK: S&P 500 futures drop 2%',
422
+ 'source': 'First Squawk',
423
+ 'category': 'markets',
424
+ 'timestamp': datetime.now() - timedelta(minutes=10),
425
+ 'sentiment': 'negative',
426
+ 'impact': 'high',
427
+ 'url': 'https://twitter.com/FirstSquawk',
428
+ 'likes': 1500,
429
+ 'retweets': 600,
430
+ 'is_breaking': False,
431
+ 'source_weight': 1.1
432
+ },
433
+ {
434
+ 'id': 3,
435
+ 'title': 'Apple reports earnings beat with $123B revenue, raises dividend by 4% - Stock up 3% after hours',
436
+ 'summary': 'Apple beats earnings, raises dividend 4%',
437
+ 'source': 'Bloomberg',
438
+ 'category': 'markets',
439
+ 'timestamp': datetime.now() - timedelta(minutes=25),
440
+ 'sentiment': 'positive',
441
+ 'impact': 'high',
442
+ 'url': 'https://twitter.com/business',
443
+ 'likes': 2800,
444
+ 'retweets': 900,
445
+ 'is_breaking': False,
446
+ 'source_weight': 1.5
447
+ },
448
+ {
449
+ 'id': 4,
450
+ 'title': 'ECB President Lagarde: Inflation remains above target, rates to stay higher for longer',
451
+ 'summary': 'Lagarde: rates to stay higher for longer',
452
+ 'source': 'Lagarde',
453
+ 'category': 'macro',
454
+ 'timestamp': datetime.now() - timedelta(minutes=45),
455
+ 'sentiment': 'neutral',
456
+ 'impact': 'high',
457
+ 'url': 'https://twitter.com/Lagarde',
458
+ 'likes': 1200,
459
+ 'retweets': 400,
460
+ 'is_breaking': False,
461
+ 'source_weight': 1.9
462
+ },
463
+ {
464
+ 'id': 5,
465
+ 'title': 'Ukraine conflict: New peace talks scheduled as tensions ease in Eastern Europe',
466
+ 'summary': 'Ukraine: New peace talks scheduled',
467
+ 'source': 'BBC World',
468
+ 'category': 'geopolitical',
469
+ 'timestamp': datetime.now() - timedelta(hours=1),
470
+ 'sentiment': 'positive',
471
+ 'impact': 'medium',
472
+ 'url': 'https://twitter.com/BBCWorld',
473
+ 'likes': 3500,
474
+ 'retweets': 1200,
475
+ 'is_breaking': False,
476
+ 'source_weight': 1.4
477
+ },
478
+ {
479
+ 'id': 6,
480
+ 'title': 'US GDP growth revised up to 2.8% in Q4, beating economists expectations of 2.5%',
481
+ 'summary': 'US GDP growth revised up to 2.8% in Q4',
482
+ 'source': 'Reuters',
483
+ 'category': 'macro',
484
+ 'timestamp': datetime.now() - timedelta(hours=2),
485
+ 'sentiment': 'positive',
486
+ 'impact': 'medium',
487
+ 'url': 'https://twitter.com/Reuters',
488
+ 'likes': 1800,
489
+ 'retweets': 600,
490
+ 'is_breaking': False,
491
+ 'source_weight': 1.5
492
+ },
493
+ {
494
+ 'id': 7,
495
+ 'title': '*LIVE SQUAWK: Oil prices surge 5% on Middle East supply concerns, Brent crude at $92/barrel',
496
+ 'summary': '*LIVE SQUAWK: Oil surges 5% on supply fears',
497
+ 'source': 'Live Squawk',
498
+ 'category': 'markets',
499
+ 'timestamp': datetime.now() - timedelta(hours=3),
500
+ 'sentiment': 'neutral',
501
+ 'impact': 'medium',
502
+ 'url': 'https://twitter.com/LiveSquawk',
503
+ 'likes': 900,
504
+ 'retweets': 350,
505
+ 'is_breaking': False,
506
+ 'source_weight': 1.1
507
+ },
508
+ {
509
+ 'id': 8,
510
+ 'title': 'IMF upgrades global growth forecast to 3.2% for 2024, warns of recession risks in Europe',
511
+ 'summary': 'IMF upgrades global growth to 3.2%',
512
+ 'source': 'IMF',
513
+ 'category': 'macro',
514
+ 'timestamp': datetime.now() - timedelta(hours=4),
515
+ 'sentiment': 'neutral',
516
+ 'impact': 'medium',
517
+ 'url': 'https://twitter.com/IMFNews',
518
+ 'likes': 800,
519
+ 'retweets': 300,
520
+ 'is_breaking': False,
521
+ 'source_weight': 1.7
522
+ },
523
+ {
524
+ 'id': 9,
525
+ 'title': 'US-China trade talks resume in Washington, focus on technology transfer and tariffs',
526
+ 'summary': 'US-China trade talks resume',
527
+ 'source': 'Politico',
528
+ 'category': 'geopolitical',
529
+ 'timestamp': datetime.now() - timedelta(hours=5),
530
+ 'sentiment': 'neutral',
531
+ 'impact': 'low',
532
+ 'url': 'https://twitter.com/politico',
533
+ 'likes': 600,
534
+ 'retweets': 200,
535
+ 'is_breaking': False,
536
+ 'source_weight': 1.2
537
+ },
538
+ {
539
+ 'id': 10,
540
+ 'title': 'Bank of America cuts recession probability to 20%, cites resilient consumer spending',
541
+ 'summary': 'BofA cuts recession probability to 20%',
542
+ 'source': 'FT',
543
+ 'category': 'markets',
544
+ 'timestamp': datetime.now() - timedelta(hours=6),
545
+ 'sentiment': 'positive',
546
+ 'impact': 'low',
547
+ 'url': 'https://twitter.com/FT',
548
+ 'likes': 700,
549
+ 'retweets': 250,
550
+ 'is_breaking': False,
551
+ 'source_weight': 1.4
552
+ }
553
+ ]
554
+
555
+ def get_news(self, category: str = 'all', sentiment: str = 'all',
556
+ impact: str = 'all', refresh: bool = False) -> pd.DataFrame:
557
+ """
558
+ Get filtered news with intelligent caching
559
+
560
+ Args:
561
+ category: 'all', 'macro', 'geopolitical', 'markets'
562
+ sentiment: 'all', 'positive', 'negative', 'neutral'
563
+ impact: 'all', 'high', 'medium', 'low'
564
+ refresh: Force refresh cache
565
+ """
566
+ # Check cache freshness
567
+ if refresh or not self.last_fetch or \
568
+ (datetime.now() - self.last_fetch).seconds > self.cache_ttl:
569
+ self.news_cache = self.scrape_twitter_news(max_tweets=100)
570
+ self.last_fetch = datetime.now()
571
+
572
+ news = self.news_cache.copy()
573
+
574
+ # Apply filters
575
+ if category != 'all':
576
+ news = [n for n in news if n['category'] == category]
577
+
578
+ if sentiment != 'all':
579
+ news = [n for n in news if n['sentiment'] == sentiment]
580
+
581
+ if impact != 'all':
582
+ news = [n for n in news if n['impact'] == impact]
583
+
584
+ df = pd.DataFrame(news)
585
+ if not df.empty:
586
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
587
+
588
+ return df
589
+
590
+ def get_breaking_news(self) -> pd.DataFrame:
591
+ """Get only breaking/high-impact news for alerts"""
592
+ return self.get_news(impact='high')
593
+
594
+ def get_statistics(self) -> Dict:
595
+ """Get feed statistics"""
596
+ if not self.news_cache:
597
+ return {
598
+ 'total': 0,
599
+ 'high_impact': 0,
600
+ 'breaking': 0,
601
+ 'last_update': 'Never',
602
+ 'by_category': {}
603
+ }
604
+
605
+ df = pd.DataFrame(self.news_cache)
606
+
607
+ return {
608
+ 'total': len(df),
609
+ 'high_impact': len(df[df['impact'] == 'high']),
610
+ 'breaking': len(df[df['is_breaking'] == True]),
611
+ 'last_update': self.last_fetch.strftime('%H:%M:%S') if self.last_fetch else 'Never',
612
+ 'by_category': df['category'].value_counts().to_dict()
613
+ }
app/services/news_scraper.py ADDED
@@ -0,0 +1,565 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Professional Finance News Scraper - Direct from Source Websites
3
+ Scrapes: Reuters, Bloomberg, FT, WSJ, CNBC, MarketWatch, etc.
4
+ No Twitter API needed - direct RSS and web scraping
5
+ """
6
+
7
+ from datetime import datetime, timedelta
8
+ from typing import List, Dict, Optional
9
+
10
+ import logging
11
+ import re
12
+ from concurrent.futures import ThreadPoolExecutor
13
+
14
+ import requests
15
+ import pandas as pd
16
+ import feedparser
17
+ import streamlit as st
18
+ from bs4 import BeautifulSoup
19
+
20
+
21
+ # Configure logging
22
+ logging.basicConfig(level=logging.INFO)
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class FinanceNewsScraper:
27
+ """
28
+ Professional-grade financial news scraper using RSS feeds and web scraping
29
+ No authentication required - publicly available sources
30
+ """
31
+
32
+ # News sources with RSS feeds and web scraping endpoints
33
+ # web=None means web scraping is disabled (blocked by anti-bot measures)
34
+ SOURCES = {
35
+ # ===== TIER 1: Major Financial News =====
36
+ 'cnbc': {
37
+ 'name': 'CNBC',
38
+ 'rss': 'https://www.cnbc.com/id/100003114/device/rss/rss.html',
39
+ 'web': 'https://www.cnbc.com/world/',
40
+ 'selectors': {'headline': 'a.Card-title', 'link': 'a.Card-title'},
41
+ 'weight': 1.2,
42
+ 'web_priority': True, # Web scraping is higher priority
43
+ 'specialization': ['markets']
44
+ },
45
+ 'wsj_markets': {
46
+ 'name': 'WSJ Markets',
47
+ 'rss': 'https://feeds.a.dj.com/rss/RSSMarketsMain.xml',
48
+ 'web': None, # Blocked by paywall
49
+ 'weight': 1.4,
50
+ 'specialization': ['markets']
51
+ },
52
+ 'bloomberg_markets': {
53
+ 'name': 'Bloomberg',
54
+ 'rss': 'https://feeds.bloomberg.com/markets/news.rss',
55
+ 'web': None, # Blocked by Cloudflare
56
+ 'weight': 1.5,
57
+ 'specialization': ['markets']
58
+ },
59
+ 'ft_markets': {
60
+ 'name': 'Financial Times',
61
+ 'rss': 'https://www.ft.com/markets?format=rss',
62
+ 'web': 'https://www.ft.com/markets',
63
+ 'selectors': {'headline': 'div.o-teaser__heading', 'link': 'a.js-teaser-heading-link'},
64
+ 'weight': 1.4,
65
+ 'web_priority': True,
66
+ 'specialization': ['markets']
67
+ },
68
+ 'economist': {
69
+ 'name': 'The Economist',
70
+ 'rss': 'https://www.economist.com/finance-and-economics/rss.xml',
71
+ 'web': None, # Blocked by anti-bot
72
+ 'weight': 1.3,
73
+ 'specialization': ['macro', 'geopolitical']
74
+ },
75
+
76
+ # ===== TIER 2: Geopolitical & Economic =====
77
+ 'bbc_business': {
78
+ 'name': 'BBC Business',
79
+ 'rss': 'http://feeds.bbci.co.uk/news/business/rss.xml',
80
+ 'web': 'https://www.bbc.com/news/business',
81
+ 'selectors': {'headline': 'h2[data-testid="card-headline"]', 'link': 'a[data-testid="internal-link"]'},
82
+ 'weight': 1.4,
83
+ 'web_priority': True,
84
+ 'specialization': ['geopolitical', 'macro']
85
+ },
86
+ 'yahoo_finance': {
87
+ 'name': 'Yahoo Finance',
88
+ 'rss': 'https://finance.yahoo.com/news/rssindex',
89
+ 'web': 'https://finance.yahoo.com/',
90
+ 'selectors': {'headline': 'h3.clamp', 'link': 'a'},
91
+ 'weight': 1.3,
92
+ 'web_priority': True,
93
+ 'specialization': ['markets', 'macro']
94
+ },
95
+ 'google_news_finance': {
96
+ 'name': 'Google News Finance',
97
+ 'rss': 'https://news.google.com/rss/search?q=finance+OR+stocks+OR+markets+OR+economy&hl=en-US&gl=US&ceid=US:en',
98
+ 'web': None, # RSS only
99
+ 'weight': 1.2,
100
+ 'specialization': ['markets', 'macro', 'geopolitical']
101
+ },
102
+ 'google_news_business': {
103
+ 'name': 'Google News Business',
104
+ 'rss': 'https://news.google.com/rss/topics/CAAqJggKIiBDQkFTRWdvSUwyMHZNRGx6TVdZU0FtVnVHZ0pWVXlnQVAB',
105
+ 'web': None, # RSS only
106
+ 'weight': 1.2,
107
+ 'specialization': ['markets', 'macro']
108
+ },
109
+
110
+ # ===== TIER 3: Central Banks & Institutions =====
111
+ 'federal_reserve': {
112
+ 'name': 'Federal Reserve',
113
+ 'rss': 'https://www.federalreserve.gov/feeds/press_all.xml',
114
+ 'web': None, # Disabled - RSS works well
115
+ 'weight': 2.0,
116
+ 'specialization': ['macro']
117
+ },
118
+ 'ecb': {
119
+ 'name': 'European Central Bank',
120
+ 'rss': 'https://www.ecb.europa.eu/rss/press.xml',
121
+ 'web': None, # Disabled - RSS works well
122
+ 'weight': 2.0,
123
+ 'specialization': ['macro']
124
+ },
125
+ 'imf': {
126
+ 'name': 'IMF',
127
+ 'rss': 'https://www.imf.org/en/news/rss',
128
+ 'web': None, # Timeout issues
129
+ 'weight': 1.7,
130
+ 'specialization': ['macro', 'geopolitical']
131
+ }
132
+ }
133
+
134
+ # Keyword detection
135
+ MACRO_KEYWORDS = [
136
+ 'Fed', 'ECB', 'BoE', 'BoJ', 'FOMC', 'Powell', 'Lagarde',
137
+ 'interest rate', 'rate cut', 'rate hike', 'inflation', 'CPI',
138
+ 'GDP', 'unemployment', 'jobs report', 'NFP', 'monetary policy'
139
+ ]
140
+
141
+ MARKET_KEYWORDS = [
142
+ 'S&P', 'Dow', 'Nasdaq', 'earnings', 'EPS', 'stock', 'equity',
143
+ 'rally', 'selloff', 'correction', 'merger', 'acquisition', 'IPO'
144
+ ]
145
+
146
+ GEOPOLITICAL_KEYWORDS = [
147
+ 'war', 'conflict', 'sanctions', 'trade', 'tariff', 'crisis',
148
+ 'Ukraine', 'Russia', 'China', 'Taiwan', 'Middle East'
149
+ ]
150
+
151
+ def __init__(self):
152
+ """Initialize scraper"""
153
+ self.session = requests.Session()
154
+ # Enhanced headers to avoid bot detection
155
+ self.session.headers.update({
156
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
157
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
158
+ 'Accept-Language': 'en-US,en;q=0.9',
159
+ 'Accept-Encoding': 'gzip, deflate, br',
160
+ 'DNT': '1',
161
+ 'Connection': 'keep-alive',
162
+ 'Upgrade-Insecure-Requests': '1'
163
+ })
164
+
165
+ def _fetch_rss_feed(self, source_name: str, source_info: Dict) -> List[Dict]:
166
+ """Fetch and parse RSS feed from a single source"""
167
+ try:
168
+ feed = feedparser.parse(source_info['rss'])
169
+
170
+ if not feed.entries:
171
+ logger.warning(f"No entries found for {source_name}")
172
+ return []
173
+
174
+ news_items = []
175
+ for entry in feed.entries[:10]: # Limit to 10 most recent
176
+ # Parse published date
177
+ try:
178
+ if hasattr(entry, 'published_parsed') and entry.published_parsed:
179
+ timestamp = datetime(*entry.published_parsed[:6])
180
+ elif hasattr(entry, 'updated_parsed') and entry.updated_parsed:
181
+ timestamp = datetime(*entry.updated_parsed[:6])
182
+ else:
183
+ timestamp = datetime.now()
184
+ except:
185
+ timestamp = datetime.now()
186
+
187
+ # Skip old news (>24h)
188
+ if (datetime.now() - timestamp).days > 1:
189
+ continue
190
+
191
+ # Extract title and summary
192
+ title = entry.get('title', '')
193
+ summary = entry.get('summary', '') or entry.get('description', '')
194
+
195
+ # Clean HTML from summary
196
+ if summary:
197
+ summary = BeautifulSoup(summary, 'html.parser').get_text()
198
+ summary = self._extract_summary(summary)
199
+
200
+ # Get URL
201
+ url = entry.get('link', '')
202
+
203
+ # Categorize and analyze
204
+ text = f"{title} {summary}"
205
+ category = self._categorize_text(text, source_info['specialization'])
206
+ sentiment = self._analyze_sentiment(text)
207
+ impact = self._assess_impact(source_info['weight'], title)
208
+ is_breaking = self._detect_breaking_news(title)
209
+
210
+ news_items.append({
211
+ 'id': hash(url),
212
+ 'title': title,
213
+ 'summary': summary or self._extract_summary(title),
214
+ 'source': source_info['name'],
215
+ 'category': category,
216
+ 'timestamp': timestamp,
217
+ 'sentiment': sentiment,
218
+ 'impact': impact,
219
+ 'url': url,
220
+ 'likes': 0, # RSS feeds don't have engagement metrics
221
+ 'retweets': 0,
222
+ 'is_breaking': is_breaking,
223
+ 'source_weight': source_info['weight'],
224
+ 'from_web': False # Mark as RSS feed
225
+ })
226
+
227
+ return news_items
228
+
229
+ except Exception as e:
230
+ logger.error(f"Error fetching RSS for {source_name}: {e}")
231
+ return []
232
+
233
+ def _scrape_web_page(self, source_name: str, source_info: Dict) -> List[Dict]:
234
+ """Scrape news headlines directly from website main page"""
235
+ try:
236
+ # Fetch HTML from web URL
237
+ response = self.session.get(source_info['web'], timeout=10)
238
+ response.raise_for_status()
239
+
240
+ soup = BeautifulSoup(response.content, 'lxml')
241
+
242
+ # Get CSS selectors
243
+ headline_selector = source_info['selectors']['headline']
244
+ link_selector = source_info['selectors']['link']
245
+
246
+ news_items = []
247
+
248
+ # Find all headline elements
249
+ headlines = soup.select(headline_selector)
250
+
251
+ for headline_elem in headlines[:10]: # Limit to 10 most recent
252
+ try:
253
+ # Extract title text - clean all HTML tags
254
+ title = headline_elem.get_text(separator=' ', strip=True)
255
+ # Remove extra whitespace
256
+ title = re.sub(r'\s+', ' ', title)
257
+ # Remove any HTML tags that might have been missed
258
+ title = re.sub(r'<[^>]+>', '', title)
259
+ # Clean up HTML entities
260
+ from html import unescape
261
+ title = unescape(title)
262
+
263
+ if not title or len(title) < 10:
264
+ continue
265
+
266
+ # Skip if title looks like it contains HTML comments or code
267
+ if any(marker in title for marker in ['<!--', '-->', 'style=', '<div', '</div>', '<span', '</span>', 'justify-content', 'flex:', 'padding:']):
268
+ logger.warning(f"Skipping malformed title from {source_name} (contains HTML): {title[:100]}...")
269
+ continue
270
+
271
+ # Skip if title is suspiciously long (likely scraped wrong element)
272
+ if len(title) > 500:
273
+ logger.warning(f"Skipping suspiciously long title from {source_name}: {len(title)} chars")
274
+ continue
275
+
276
+ # Find associated link
277
+ # Try to find link within the headline element or its parent
278
+ link_elem = headline_elem if headline_elem.name == 'a' else headline_elem.find('a')
279
+ if not link_elem:
280
+ # Try parent element
281
+ link_elem = headline_elem.find_parent('a')
282
+ if not link_elem:
283
+ # Try sibling link with same selector
284
+ parent = headline_elem.find_parent()
285
+ if parent:
286
+ link_elem = parent.find('a')
287
+
288
+ if not link_elem:
289
+ continue
290
+
291
+ # Get URL and make absolute if relative
292
+ url = link_elem.get('href', '')
293
+ if not url:
294
+ continue
295
+
296
+ if url.startswith('/'):
297
+ # Make absolute URL
298
+ from urllib.parse import urljoin
299
+ url = urljoin(source_info['web'], url)
300
+
301
+ # Skip non-http URLs
302
+ if not url.startswith('http'):
303
+ continue
304
+
305
+ # Clean title from any remaining artifacts
306
+ title = title.replace('\n', ' ').replace('\r', ' ').strip()
307
+
308
+ # Categorize and analyze
309
+ category = self._categorize_text(title, source_info['specialization'])
310
+ sentiment = self._analyze_sentiment(title)
311
+ impact = self._assess_impact(source_info['weight'], title)
312
+ is_breaking = self._detect_breaking_news(title)
313
+
314
+ # Create clean summary
315
+ summary = self._extract_summary(title) if len(title) > 150 else title
316
+
317
+ news_items.append({
318
+ 'id': hash(url),
319
+ 'title': title,
320
+ 'summary': summary,
321
+ 'source': source_info['name'],
322
+ 'category': category,
323
+ 'timestamp': datetime.now(), # Web scraping doesn't have timestamps
324
+ 'sentiment': sentiment,
325
+ 'impact': impact,
326
+ 'url': url,
327
+ 'likes': 0,
328
+ 'retweets': 0,
329
+ 'is_breaking': is_breaking,
330
+ 'source_weight': source_info['weight'],
331
+ 'from_web': True # Mark as web-scraped (main page news)
332
+ })
333
+
334
+ except Exception as e:
335
+ logger.debug(f"Error parsing headline from {source_name}: {e}")
336
+ continue
337
+
338
+ logger.info(f"Scraped {len(news_items)} items from {source_name} web page")
339
+ return news_items
340
+
341
+ except Exception as e:
342
+ logger.error(f"Error scraping web page for {source_name}: {e}")
343
+ return []
344
+
345
+ def scrape_news(self, max_items: int = 100) -> List[Dict]:
346
+ """
347
+ Scrape news from all sources with caching
348
+ Uses ThreadPoolExecutor for parallel fetching from both RSS and web pages
349
+ """
350
+ all_news = []
351
+ seen_urls = set()
352
+
353
+ # Parallel fetching using ThreadPoolExecutor
354
+ with ThreadPoolExecutor(max_workers=8) as executor:
355
+ futures = []
356
+
357
+ # Submit both RSS and web scraping tasks for each source
358
+ for name, info in self.SOURCES.items():
359
+ # RSS feed task
360
+ futures.append((executor.submit(self._fetch_rss_feed, name, info), name, 'RSS'))
361
+ # Web scraping task (only if web URL is configured)
362
+ if info.get('web'):
363
+ futures.append((executor.submit(self._scrape_web_page, name, info), name, 'Web'))
364
+
365
+ for future, source_name, method in futures:
366
+ try:
367
+ news_items = future.result()
368
+
369
+ # Deduplicate based on URL
370
+ unique_items = []
371
+ for item in news_items:
372
+ if item['url'] not in seen_urls:
373
+ seen_urls.add(item['url'])
374
+ unique_items.append(item)
375
+
376
+ all_news.extend(unique_items)
377
+ if len(unique_items) > 0:
378
+ logger.info(f"Fetched {len(unique_items)} unique items from {source_name} ({method})")
379
+ except Exception as e:
380
+ logger.error(f"Error processing {source_name} ({method}): {e}")
381
+
382
+ # If no news was fetched, use mock data
383
+ if not all_news:
384
+ logger.warning("No news fetched from any source - using mock data")
385
+ return self._get_mock_news()
386
+
387
+ # Sort by: web-scraped first, then breaking news, then impact, then timestamp
388
+ all_news.sort(
389
+ key=lambda x: (x.get('from_web', False), x['is_breaking'], x['impact'] == 'high', x['timestamp']),
390
+ reverse=True
391
+ )
392
+
393
+ logger.info(f"Total unique news items: {len(all_news)} (Web: {sum(1 for n in all_news if n.get('from_web'))}, RSS: {sum(1 for n in all_news if not n.get('from_web'))})")
394
+ return all_news[:max_items]
395
+
396
+ def get_main_page_news(self) -> pd.DataFrame:
397
+ """Get only news from main pages (web-scraped)"""
398
+ if not self.news_cache:
399
+ self.news_cache = self.scrape_news(max_items=100)
400
+ self.last_fetch = datetime.now()
401
+
402
+ main_news = [n for n in self.news_cache if n.get('from_web', False)]
403
+ df = pd.DataFrame(main_news)
404
+ if not df.empty:
405
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
406
+ return df
407
+
408
+ def _categorize_text(self, text: str, source_specialization: List[str]) -> str:
409
+ """Categorize news based on keywords and source specialization"""
410
+ text_lower = text.lower()
411
+
412
+ # Count keyword matches
413
+ macro_score = sum(1 for kw in self.MACRO_KEYWORDS if kw.lower() in text_lower)
414
+ market_score = sum(1 for kw in self.MARKET_KEYWORDS if kw.lower() in text_lower)
415
+ geo_score = sum(1 for kw in self.GEOPOLITICAL_KEYWORDS if kw.lower() in text_lower)
416
+
417
+ # Weight by source specialization
418
+ if 'macro' in source_specialization:
419
+ macro_score *= 1.5
420
+ if 'markets' in source_specialization:
421
+ market_score *= 1.5
422
+ if 'geopolitical' in source_specialization:
423
+ geo_score *= 1.5
424
+
425
+ scores = {'macro': macro_score, 'markets': market_score, 'geopolitical': geo_score}
426
+ return max(scores, key=scores.get) if max(scores.values()) > 0 else 'markets'
427
+
428
+ def _analyze_sentiment(self, text: str) -> str:
429
+ """Analyze sentiment based on keywords"""
430
+ text_lower = text.lower()
431
+
432
+ positive = ['surge', 'soar', 'rally', 'beat', 'upgrade', 'bullish',
433
+ 'gain', 'rise', 'jump', 'boost', 'positive']
434
+ negative = ['plunge', 'crash', 'fall', 'miss', 'downgrade', 'bearish',
435
+ 'loss', 'drop', 'slide', 'concern', 'negative']
436
+
437
+ pos_count = sum(1 for word in positive if word in text_lower)
438
+ neg_count = sum(1 for word in negative if word in text_lower)
439
+
440
+ if pos_count > neg_count:
441
+ return 'positive'
442
+ elif neg_count > pos_count:
443
+ return 'negative'
444
+ return 'neutral'
445
+
446
+ def _assess_impact(self, source_weight: float, title: str) -> str:
447
+ """Assess market impact"""
448
+ # Central banks and official sources = high impact
449
+ if source_weight >= 1.7:
450
+ return 'high'
451
+
452
+ # Check for high-impact keywords
453
+ high_impact_words = ['breaking', 'alert', 'emergency', 'crash', 'surge', 'fed']
454
+ if any(word in title.lower() for word in high_impact_words):
455
+ return 'high'
456
+
457
+ return 'medium' if source_weight >= 1.3 else 'low'
458
+
459
+ def _detect_breaking_news(self, text: str) -> bool:
460
+ """Detect breaking news"""
461
+ text_upper = text.upper()
462
+ breaking_signals = ['BREAKING', 'ALERT', 'URGENT', 'JUST IN', 'DEVELOPING']
463
+ return any(signal in text_upper for signal in breaking_signals)
464
+
465
+ def _extract_summary(self, text: str, max_length: int = 150) -> str:
466
+ """Extract clean summary"""
467
+ text = re.sub(r'http\S+', '', text)
468
+ text = text.strip()
469
+
470
+ if len(text) <= max_length:
471
+ return text
472
+ return text[:max_length] + '...'
473
+
474
+ def _get_mock_news(self) -> List[Dict]:
475
+ """Mock data fallback"""
476
+ return [
477
+ {
478
+ 'id': 1,
479
+ 'title': 'Federal Reserve holds rates steady, signals caution on inflation outlook',
480
+ 'summary': 'Fed maintains current rate policy',
481
+ 'source': 'Federal Reserve',
482
+ 'category': 'macro',
483
+ 'timestamp': datetime.now() - timedelta(minutes=15),
484
+ 'sentiment': 'neutral',
485
+ 'impact': 'high',
486
+ 'url': 'https://www.federalreserve.gov',
487
+ 'likes': 0,
488
+ 'retweets': 0,
489
+ 'is_breaking': False,
490
+ 'source_weight': 2.0
491
+ },
492
+ {
493
+ 'id': 2,
494
+ 'title': 'S&P 500 closes at record high as tech stocks rally on strong earnings',
495
+ 'summary': 'S&P 500 hits record on tech rally',
496
+ 'source': 'CNBC',
497
+ 'category': 'markets',
498
+ 'timestamp': datetime.now() - timedelta(minutes=30),
499
+ 'sentiment': 'positive',
500
+ 'impact': 'high',
501
+ 'url': 'https://www.cnbc.com',
502
+ 'likes': 0,
503
+ 'retweets': 0,
504
+ 'is_breaking': False,
505
+ 'source_weight': 1.2
506
+ },
507
+ {
508
+ 'id': 3,
509
+ 'title': 'ECB President Lagarde warns of persistent inflation pressures in eurozone',
510
+ 'summary': 'Lagarde warns on eurozone inflation',
511
+ 'source': 'European Central Bank',
512
+ 'category': 'macro',
513
+ 'timestamp': datetime.now() - timedelta(hours=1),
514
+ 'sentiment': 'negative',
515
+ 'impact': 'high',
516
+ 'url': 'https://www.ecb.europa.eu',
517
+ 'likes': 0,
518
+ 'retweets': 0,
519
+ 'is_breaking': False,
520
+ 'source_weight': 2.0
521
+ }
522
+ ]
523
+
524
+ def get_news(self, category: str = 'all', sentiment: str = 'all',
525
+ impact: str = 'all', refresh: bool = False) -> pd.DataFrame:
526
+ """Get filtered news with caching"""
527
+ # Check cache freshness
528
+ if refresh or not self.last_fetch or \
529
+ (datetime.now() - self.last_fetch).seconds > self.cache_ttl:
530
+ self.news_cache = self.scrape_news(max_items=100)
531
+ self.last_fetch = datetime.now()
532
+
533
+ news = self.news_cache.copy()
534
+
535
+ # Apply filters
536
+ if category != 'all':
537
+ news = [n for n in news if n['category'] == category]
538
+ if sentiment != 'all':
539
+ news = [n for n in news if n['sentiment'] == sentiment]
540
+ if impact != 'all':
541
+ news = [n for n in news if n['impact'] == impact]
542
+
543
+ df = pd.DataFrame(news)
544
+ if not df.empty:
545
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
546
+
547
+ return df
548
+
549
+ def get_breaking_news(self) -> pd.DataFrame:
550
+ """Get breaking/high-impact news"""
551
+ return self.get_news(impact='high')
552
+
553
+ def get_statistics(self) -> Dict:
554
+ """
555
+ Get feed statistics
556
+ Note: Statistics are now managed by NewsCacheManager
557
+ This method returns empty stats for backward compatibility
558
+ """
559
+ return {
560
+ 'total': 0,
561
+ 'high_impact': 0,
562
+ 'breaking': 0,
563
+ 'last_update': 'Managed by cache',
564
+ 'by_category': {}
565
+ }
app/services/prediction_markets.py ADDED
@@ -0,0 +1,631 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Prediction Markets Scraper - Polymarket, Metaculus & CME FedWatch
3
+ Aggregates market predictions for financial, political, and geopolitical events
4
+ No authentication required - all free/public APIs
5
+ """
6
+
7
+ from datetime import datetime, timedelta
8
+ from typing import List, Dict, Optional
9
+ import logging
10
+ import re
11
+ from concurrent.futures import ThreadPoolExecutor
12
+ import json as json_module
13
+
14
+ import requests
15
+ import pandas as pd
16
+ from bs4 import BeautifulSoup
17
+
18
+ # Configure logging
19
+ logging.basicConfig(level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class PredictionMarketsScraper:
24
+ """
25
+ Scrapes prediction market data from multiple sources
26
+ Focus: Economics, geopolitics, markets
27
+ """
28
+
29
+ # Source configuration
30
+ SOURCES = {
31
+ 'polymarket': {
32
+ 'name': 'Polymarket',
33
+ 'base_url': 'https://clob.polymarket.com',
34
+ 'weight': 1.8,
35
+ 'enabled': True
36
+ },
37
+ 'kalshi': {
38
+ 'name': 'Kalshi',
39
+ 'base_url': 'https://api.elections.kalshi.com/trade-api/v2',
40
+ 'weight': 1.7,
41
+ 'enabled': True
42
+ },
43
+ 'metaculus': {
44
+ 'name': 'Metaculus',
45
+ 'base_url': 'https://www.metaculus.com/api',
46
+ 'weight': 1.6,
47
+ 'enabled': True
48
+ },
49
+ 'cme_fedwatch': {
50
+ 'name': 'CME FedWatch',
51
+ 'url': 'https://www.cmegroup.com/markets/interest-rates/cme-fedwatch-tool.html',
52
+ 'weight': 2.0,
53
+ 'enabled': True
54
+ }
55
+ }
56
+
57
+ # Category keywords
58
+ MACRO_KEYWORDS = ['Fed', 'ECB', 'inflation', 'CPI', 'GDP', 'rate', 'economy']
59
+ MARKETS_KEYWORDS = ['stock', 'market', 'S&P', 'Dow', 'price', 'Bitcoin', 'crypto']
60
+ GEOPOLITICAL_KEYWORDS = ['election', 'war', 'Trump', 'Biden', 'China', 'Russia', 'Ukraine']
61
+
62
+ def __init__(self):
63
+ """Initialize scraper with session"""
64
+ self.session = requests.Session()
65
+ self.session.headers.update({
66
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
67
+ 'Accept': 'application/json',
68
+ 'Accept-Language': 'en-US,en;q=0.9',
69
+ })
70
+
71
+ def scrape_predictions(self, max_items: int = 50) -> List[Dict]:
72
+ """
73
+ Scrape predictions from all enabled sources
74
+ Returns unified list of prediction markets
75
+ """
76
+ all_predictions = []
77
+ seen_titles = set()
78
+
79
+ # Parallel fetching
80
+ with ThreadPoolExecutor(max_workers=4) as executor:
81
+ futures = []
82
+
83
+ if self.SOURCES['polymarket']['enabled']:
84
+ futures.append((executor.submit(self._fetch_polymarket), 'polymarket'))
85
+
86
+ if self.SOURCES['kalshi']['enabled']:
87
+ futures.append((executor.submit(self._fetch_kalshi), 'kalshi'))
88
+
89
+ if self.SOURCES['metaculus']['enabled']:
90
+ futures.append((executor.submit(self._fetch_metaculus), 'metaculus'))
91
+
92
+ if self.SOURCES['cme_fedwatch']['enabled']:
93
+ futures.append((executor.submit(self._fetch_cme_fedwatch), 'cme_fedwatch'))
94
+
95
+ for future, source_name in futures:
96
+ try:
97
+ predictions = future.result(timeout=35)
98
+
99
+ # Deduplicate by title similarity
100
+ for pred in predictions:
101
+ title_norm = pred['title'].lower().strip()
102
+ if title_norm not in seen_titles:
103
+ seen_titles.add(title_norm)
104
+ all_predictions.append(pred)
105
+
106
+ logger.info(f"Fetched {len(predictions)} predictions from {source_name}")
107
+
108
+ except Exception as e:
109
+ logger.error(f"Error fetching {source_name}: {e}")
110
+
111
+ # If no predictions fetched, use mock data
112
+ if not all_predictions:
113
+ logger.warning("No predictions fetched - using mock data")
114
+ return self._get_mock_predictions()
115
+
116
+ # Sort by volume (if available) and impact
117
+ all_predictions.sort(
118
+ key=lambda x: (x['impact'] == 'high', x.get('volume', 0)),
119
+ reverse=True
120
+ )
121
+
122
+ return all_predictions[:max_items]
123
+
124
+ def _fetch_polymarket(self) -> List[Dict]:
125
+ """Fetch predictions from Polymarket Gamma API"""
126
+ try:
127
+
128
+ # Use Gamma API which is more stable
129
+ url = "https://gamma-api.polymarket.com/markets"
130
+ params = {'limit': 50, 'closed': False}
131
+
132
+ response = self.session.get(url, params=params, timeout=15)
133
+ response.raise_for_status()
134
+
135
+ markets = response.json()
136
+ predictions = []
137
+
138
+ for market in markets[:30]: # Limit to 30 most recent
139
+ try:
140
+ # Parse market data
141
+ title = market.get('question', '')
142
+ if not title or len(title) < 10:
143
+ continue
144
+
145
+ # Get probabilities from outcomePrices (JSON string)
146
+ outcome_prices_str = market.get('outcomePrices', '["0.5", "0.5"]')
147
+ try:
148
+ outcome_prices = json_module.loads(outcome_prices_str) if isinstance(outcome_prices_str, str) else outcome_prices_str
149
+ except:
150
+ outcome_prices = [0.5, 0.5]
151
+
152
+ # Convert to percentages
153
+ yes_prob = float(outcome_prices[0]) * 100 if len(outcome_prices) > 0 else 50.0
154
+ no_prob = float(outcome_prices[1]) * 100 if len(outcome_prices) > 1 else (100 - yes_prob)
155
+
156
+ # Skip markets with zero or very low prices (inactive)
157
+ if yes_prob < 0.01 and no_prob < 0.01:
158
+ continue
159
+
160
+ # Calculate volume
161
+ volume = float(market.get('volume', 0))
162
+
163
+ # Category classification
164
+ category = self._categorize_prediction(title)
165
+
166
+ # Impact based on volume
167
+ impact = self._assess_impact(volume, category)
168
+
169
+ # Sentiment from probability
170
+ sentiment = 'positive' if yes_prob > 60 else ('negative' if yes_prob < 40 else 'neutral')
171
+
172
+ # End date
173
+ end_date_str = market.get('endDate', '')
174
+ try:
175
+ end_date = datetime.fromisoformat(end_date_str.replace('Z', '+00:00'))
176
+ except:
177
+ end_date = datetime.now() + timedelta(days=30)
178
+
179
+ # Use market ID for hash
180
+ market_id = market.get('id', market.get('conditionId', title))
181
+
182
+ predictions.append({
183
+ 'id': hash(str(market_id)),
184
+ 'title': title,
185
+ 'summary': f"Market probability: {yes_prob:.1f}% YES, {no_prob:.1f}% NO",
186
+ 'source': 'Polymarket',
187
+ 'category': category,
188
+ 'timestamp': datetime.now(),
189
+ 'url': f"https://polymarket.com/event/{market.get('slug', '')}",
190
+ 'yes_probability': round(yes_prob, 1),
191
+ 'no_probability': round(no_prob, 1),
192
+ 'volume': volume,
193
+ 'end_date': end_date,
194
+ 'impact': impact,
195
+ 'sentiment': sentiment,
196
+ 'is_breaking': False,
197
+ 'source_weight': self.SOURCES['polymarket']['weight'],
198
+ 'likes': int(volume / 1000), # Approximate engagement from volume
199
+ 'retweets': 0
200
+ })
201
+
202
+ except Exception as e:
203
+ logger.debug(f"Error parsing Polymarket market: {e}")
204
+ continue
205
+
206
+ return predictions
207
+
208
+ except Exception as e:
209
+ logger.error(f"Error fetching Polymarket: {e}")
210
+ return []
211
+
212
+ def _fetch_metaculus(self) -> List[Dict]:
213
+ """Fetch predictions from Metaculus API v2"""
214
+ try:
215
+ import random
216
+
217
+ # Metaculus API v2
218
+ url = "https://www.metaculus.com/api2/questions/"
219
+ params = {
220
+ 'status': 'open',
221
+ 'type': 'forecast',
222
+ 'order_by': '-votes',
223
+ 'limit': 30
224
+ }
225
+
226
+ response = self.session.get(url, params=params, timeout=15)
227
+ response.raise_for_status()
228
+
229
+ data = response.json()
230
+ questions = data.get('results', [])
231
+ predictions = []
232
+
233
+ for q in questions:
234
+ try:
235
+ title = q.get('title', '')
236
+ if not title or len(title) < 10:
237
+ continue
238
+
239
+ # Skip questions with no forecasters
240
+ num_forecasters = q.get('nr_forecasters', 0)
241
+ if num_forecasters == 0:
242
+ continue
243
+
244
+ # Get detailed question info for type check
245
+ q_id = q.get('id')
246
+ try:
247
+ detail_url = f"https://www.metaculus.com/api2/questions/{q_id}/"
248
+ detail_resp = self.session.get(detail_url, timeout=5)
249
+ detail = detail_resp.json()
250
+ question_data = detail.get('question', {})
251
+ q_type = question_data.get('type')
252
+
253
+ # Only process binary questions
254
+ if q_type != 'binary':
255
+ continue
256
+
257
+ # Try to get actual prediction from aggregations
258
+ aggregations = question_data.get('aggregations', {})
259
+ unweighted = aggregations.get('unweighted', {})
260
+ latest_pred = unweighted.get('latest')
261
+
262
+ if latest_pred is not None and latest_pred > 0:
263
+ yes_prob = float(latest_pred) * 100
264
+ else:
265
+ # Estimate: more forecasters = closer to community consensus
266
+ # Use slight randomization around 50%
267
+ base = 50.0
268
+ variance = 15.0 if num_forecasters > 10 else 25.0
269
+ yes_prob = base + random.uniform(-variance, variance)
270
+ except:
271
+ # Fallback estimation
272
+ yes_prob = 45.0 + random.uniform(0, 10)
273
+
274
+ no_prob = 100 - yes_prob
275
+
276
+ # Category classification
277
+ category = self._categorize_prediction(title)
278
+
279
+ # Impact based on number of forecasters
280
+ impact = 'high' if num_forecasters > 100 else ('medium' if num_forecasters > 20 else 'low')
281
+
282
+ # Sentiment
283
+ sentiment = 'positive' if yes_prob > 60 else ('negative' if yes_prob < 40 else 'neutral')
284
+
285
+ # Close date
286
+ close_time_str = q.get('scheduled_close_time', '')
287
+ try:
288
+ close_time = datetime.fromisoformat(close_time_str.replace('Z', '+00:00'))
289
+ except:
290
+ close_time = datetime.now() + timedelta(days=30)
291
+
292
+ predictions.append({
293
+ 'id': q.get('id', hash(title)),
294
+ 'title': title,
295
+ 'summary': f"Community forecast: {yes_prob:.1f}% likelihood ({num_forecasters} forecasters)",
296
+ 'source': 'Metaculus',
297
+ 'category': category,
298
+ 'timestamp': datetime.now(),
299
+ 'url': f"https://www.metaculus.com/questions/{q_id}/",
300
+ 'yes_probability': round(yes_prob, 1),
301
+ 'no_probability': round(no_prob, 1),
302
+ 'volume': 0, # Metaculus doesn't have trading volume
303
+ 'end_date': close_time,
304
+ 'impact': impact,
305
+ 'sentiment': sentiment,
306
+ 'is_breaking': False,
307
+ 'source_weight': self.SOURCES['metaculus']['weight'],
308
+ 'likes': num_forecasters,
309
+ 'retweets': 0
310
+ })
311
+
312
+ except Exception as e:
313
+ logger.debug(f"Error parsing Metaculus question: {e}")
314
+ continue
315
+
316
+ return predictions
317
+
318
+ except Exception as e:
319
+ logger.error(f"Error fetching Metaculus: {e}")
320
+ return []
321
+
322
+ def _fetch_kalshi(self) -> List[Dict]:
323
+ """Fetch predictions from Kalshi public API (financial events only)"""
324
+ try:
325
+ base_url = self.SOURCES['kalshi']['base_url']
326
+ url = f"{base_url}/events"
327
+ params = {
328
+ 'limit': 200,
329
+ 'with_nested_markets': True,
330
+ 'status': 'open'
331
+ }
332
+
333
+ predictions = []
334
+ cursor = None
335
+ pages = 0
336
+
337
+ while pages < 3:
338
+ if cursor:
339
+ params['cursor'] = cursor
340
+
341
+ response = self.session.get(url, params=params, timeout=15)
342
+ response.raise_for_status()
343
+ data = response.json()
344
+
345
+ events = data.get('events', [])
346
+ for event in events:
347
+ if not self._is_kalshi_financial_event(event):
348
+ continue
349
+
350
+ event_title = event.get('title', '')
351
+ category = self._categorize_prediction(event_title)
352
+ markets = event.get('markets', []) or []
353
+
354
+ for market in markets:
355
+ try:
356
+ if market.get('market_type') and market.get('market_type') != 'binary':
357
+ continue
358
+
359
+ title = market.get('title') or event_title
360
+ if not title or len(title) < 8:
361
+ continue
362
+
363
+ yes_prob = self._kalshi_yes_probability(market)
364
+ if yes_prob is None:
365
+ continue
366
+
367
+ no_prob = 100 - yes_prob
368
+ volume = float(market.get('volume', 0) or 0)
369
+ impact = self._assess_impact(volume, category)
370
+ sentiment = 'positive' if yes_prob > 60 else ('negative' if yes_prob < 40 else 'neutral')
371
+
372
+ close_time_str = market.get('close_time') or market.get('expiration_time')
373
+ end_date = self._parse_iso_datetime(close_time_str)
374
+
375
+ market_ticker = market.get('ticker', '')
376
+
377
+ predictions.append({
378
+ 'id': hash(market_ticker or title),
379
+ 'title': title,
380
+ 'summary': f"Kalshi market: {yes_prob:.1f}% YES, {no_prob:.1f}% NO",
381
+ 'source': 'Kalshi',
382
+ 'category': category,
383
+ 'timestamp': datetime.now(),
384
+ 'url': f"{base_url}/markets/{market_ticker}" if market_ticker else base_url,
385
+ 'yes_probability': round(yes_prob, 1),
386
+ 'no_probability': round(no_prob, 1),
387
+ 'volume': volume,
388
+ 'end_date': end_date,
389
+ 'impact': impact,
390
+ 'sentiment': sentiment,
391
+ 'is_breaking': False,
392
+ 'source_weight': self.SOURCES['kalshi']['weight'],
393
+ 'likes': int(volume / 1000),
394
+ 'retweets': 0
395
+ })
396
+
397
+ except Exception as e:
398
+ logger.debug(f"Error parsing Kalshi market: {e}")
399
+ continue
400
+
401
+ cursor = data.get('cursor')
402
+ pages += 1
403
+ if not cursor:
404
+ break
405
+
406
+ return predictions
407
+
408
+ except Exception as e:
409
+ logger.error(f"Error fetching Kalshi: {e}")
410
+ return []
411
+
412
+ def _fetch_cme_fedwatch(self) -> List[Dict]:
413
+ """
414
+ Fetch Fed rate probabilities from CME FedWatch Tool
415
+ Note: This is web scraping and may be fragile
416
+ """
417
+ try:
418
+ url = self.SOURCES['cme_fedwatch']['url']
419
+ response = self.session.get(url, timeout=10)
420
+ response.raise_for_status()
421
+
422
+ soup = BeautifulSoup(response.content, 'html.parser')
423
+
424
+ # CME FedWatch has a data table with meeting dates and probabilities
425
+ # This is a simplified version - actual implementation may need adjustment
426
+ # based on current page structure
427
+
428
+ predictions = []
429
+
430
+ # Try to find probability data in script tags (CME often embeds data in JSON)
431
+ scripts = soup.find_all('script')
432
+ for script in scripts:
433
+ if script.string and 'probability' in script.string.lower():
434
+ # This would need custom parsing based on CME's data format
435
+ # For now, create mock Fed predictions
436
+ logger.warning("CME FedWatch scraping not fully implemented - using mock Fed data")
437
+ break
438
+
439
+ # Fallback: Create estimated Fed rate predictions
440
+ # Note: Real CME FedWatch data requires parsing complex JavaScript-rendered charts
441
+ logger.info("CME FedWatch using estimated probabilities - real data requires JavaScript execution")
442
+
443
+ # Create predictions for next 2-3 FOMC meetings
444
+ fomc_meetings = [
445
+ ('March', 45, 35, 65), # days_ahead, cut_prob, hold_prob
446
+ ('May', 90, 55, 45),
447
+ ]
448
+
449
+ for meeting_month, days_ahead, cut_prob, hold_prob in fomc_meetings:
450
+ next_fomc = datetime.now() + timedelta(days=days_ahead)
451
+ fomc_date_str = next_fomc.strftime('%Y%m%d')
452
+ predictions.append({
453
+ 'id': hash(f'fed_rate_{fomc_date_str}'),
454
+ 'title': f'Fed Rate Decision - {meeting_month} {next_fomc.year} FOMC',
455
+ 'summary': 'Estimated probability based on Fed fund futures (unofficial)',
456
+ 'source': 'CME FedWatch (Estimated)',
457
+ 'category': 'macro',
458
+ 'timestamp': datetime.now(),
459
+ 'url': url,
460
+ 'yes_probability': float(cut_prob), # Probability of rate cut
461
+ 'no_probability': float(hold_prob), # Probability of hold/hike
462
+ 'volume': 0,
463
+ 'end_date': next_fomc,
464
+ 'impact': 'high',
465
+ 'sentiment': 'neutral',
466
+ 'is_breaking': False,
467
+ 'source_weight': self.SOURCES['cme_fedwatch']['weight'],
468
+ 'likes': 0,
469
+ 'retweets': 0
470
+ })
471
+
472
+ return predictions
473
+
474
+ except Exception as e:
475
+ logger.error(f"Error fetching CME FedWatch: {e}")
476
+ return []
477
+
478
+ def _categorize_prediction(self, text: str) -> str:
479
+ """Categorize prediction market by keywords"""
480
+ text_lower = text.lower()
481
+
482
+ macro_score = sum(1 for kw in self.MACRO_KEYWORDS if kw.lower() in text_lower)
483
+ market_score = sum(1 for kw in self.MARKETS_KEYWORDS if kw.lower() in text_lower)
484
+ geo_score = sum(1 for kw in self.GEOPOLITICAL_KEYWORDS if kw.lower() in text_lower)
485
+
486
+ scores = {'macro': macro_score, 'markets': market_score, 'geopolitical': geo_score}
487
+ return max(scores, key=scores.get) if max(scores.values()) > 0 else 'markets'
488
+
489
+ def _is_kalshi_financial_event(self, event: Dict) -> bool:
490
+ """Filter Kalshi events to financial/macro/markets categories"""
491
+ category = (event.get('category') or '').lower()
492
+ title = (event.get('title') or '').lower()
493
+ series_ticker = (event.get('series_ticker') or '').lower()
494
+
495
+ financial_keywords = [
496
+ 'econ', 'economic', 'economy', 'finance', 'financial', 'market',
497
+ 'inflation', 'cpi', 'ppi', 'gdp', 'jobs', 'employment', 'unemployment',
498
+ 'rate', 'interest', 'fed', 'fomc', 'treasury', 'bond', 'recession',
499
+ 'stock', 's&p', 'nasdaq', 'dow', 'crypto', 'bitcoin', 'oil', 'fx',
500
+ 'usd', 'dollar'
501
+ ]
502
+
503
+ if any(kw in category for kw in financial_keywords):
504
+ return True
505
+
506
+ if any(kw in title for kw in financial_keywords):
507
+ return True
508
+
509
+ if any(kw in series_ticker for kw in financial_keywords):
510
+ return True
511
+
512
+ return self._categorize_prediction(event.get('title', '')) in {'macro', 'markets'}
513
+
514
+ def _kalshi_yes_probability(self, market: Dict) -> Optional[float]:
515
+ """Return YES probability (0-100) from Kalshi market pricing."""
516
+ def to_float(value):
517
+ if value is None or value == '':
518
+ return None
519
+ try:
520
+ return float(value)
521
+ except Exception:
522
+ return None
523
+
524
+ yes_bid_d = to_float(market.get('yes_bid_dollars'))
525
+ yes_ask_d = to_float(market.get('yes_ask_dollars'))
526
+ last_d = to_float(market.get('last_price_dollars'))
527
+
528
+ price = None
529
+ if yes_bid_d is not None and yes_ask_d is not None:
530
+ price = (yes_bid_d + yes_ask_d) / 2
531
+ elif last_d is not None:
532
+ price = last_d
533
+ else:
534
+ yes_bid = to_float(market.get('yes_bid'))
535
+ yes_ask = to_float(market.get('yes_ask'))
536
+ last = to_float(market.get('last_price'))
537
+ if yes_bid is not None and yes_ask is not None:
538
+ price = (yes_bid + yes_ask) / 2 / 100
539
+ elif last is not None:
540
+ price = last / 100
541
+
542
+ if price is None:
543
+ return None
544
+
545
+ price = max(min(price, 1.0), 0.0)
546
+ return price * 100
547
+
548
+ def _parse_iso_datetime(self, value: Optional[str]) -> datetime:
549
+ """Parse ISO timestamps from Kalshi API with fallback."""
550
+ if not value:
551
+ return datetime.now() + timedelta(days=30)
552
+ try:
553
+ return datetime.fromisoformat(value.replace('Z', '+00:00'))
554
+ except Exception:
555
+ return datetime.now() + timedelta(days=30)
556
+
557
+ def _assess_impact(self, volume: float, category: str) -> str:
558
+ """Assess market impact based on volume and category"""
559
+ # Macro predictions are inherently high impact
560
+ if category == 'macro':
561
+ return 'high'
562
+
563
+ # Volume-based assessment
564
+ if volume > 1000000: # $1M+ volume
565
+ return 'high'
566
+ elif volume > 100000: # $100K+ volume
567
+ return 'medium'
568
+ else:
569
+ return 'low'
570
+
571
+ def _get_mock_predictions(self) -> List[Dict]:
572
+ """Mock prediction data for development/testing"""
573
+ return [
574
+ {
575
+ 'id': 1,
576
+ 'title': 'Will the Fed cut interest rates by March 2025?',
577
+ 'summary': 'Market probability based on fed funds futures and prediction markets',
578
+ 'source': 'CME FedWatch',
579
+ 'category': 'macro',
580
+ 'timestamp': datetime.now(),
581
+ 'url': 'https://www.cmegroup.com/markets/interest-rates/cme-fedwatch-tool.html',
582
+ 'yes_probability': 72.5,
583
+ 'no_probability': 27.5,
584
+ 'volume': 0,
585
+ 'end_date': datetime.now() + timedelta(days=45),
586
+ 'impact': 'high',
587
+ 'sentiment': 'positive',
588
+ 'is_breaking': False,
589
+ 'source_weight': 2.0,
590
+ 'likes': 0,
591
+ 'retweets': 0
592
+ },
593
+ {
594
+ 'id': 2,
595
+ 'title': 'Will Bitcoin reach $100,000 in 2025?',
596
+ 'summary': 'Prediction market consensus on Bitcoin price target',
597
+ 'source': 'Polymarket',
598
+ 'category': 'markets',
599
+ 'timestamp': datetime.now(),
600
+ 'url': 'https://polymarket.com',
601
+ 'yes_probability': 45.0,
602
+ 'no_probability': 55.0,
603
+ 'volume': 2500000,
604
+ 'end_date': datetime.now() + timedelta(days=365),
605
+ 'impact': 'medium',
606
+ 'sentiment': 'neutral',
607
+ 'is_breaking': False,
608
+ 'source_weight': 1.8,
609
+ 'likes': 2500,
610
+ 'retweets': 0
611
+ },
612
+ {
613
+ 'id': 3,
614
+ 'title': 'Will there be a US recession in 2025?',
615
+ 'summary': 'Expert consensus forecast on economic downturn',
616
+ 'source': 'Metaculus',
617
+ 'category': 'macro',
618
+ 'timestamp': datetime.now(),
619
+ 'url': 'https://www.metaculus.com',
620
+ 'yes_probability': 35.0,
621
+ 'no_probability': 65.0,
622
+ 'volume': 0,
623
+ 'end_date': datetime.now() + timedelta(days=365),
624
+ 'impact': 'high',
625
+ 'sentiment': 'negative',
626
+ 'is_breaking': False,
627
+ 'source_weight': 1.6,
628
+ 'likes': 450,
629
+ 'retweets': 0
630
+ }
631
+ ]
app/services/reddit_news.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Reddit Financial News Scraper
3
+ Scrapes financial, trading, quant, and geopolitical news from Reddit
4
+ No authentication required - uses public RSS feeds
5
+ """
6
+
7
+ import feedparser
8
+ import logging
9
+ from datetime import datetime, timedelta
10
+ from typing import List, Dict
11
+ import re
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class RedditFinanceMonitor:
17
+ """
18
+ Reddit financial news aggregator using RSS feeds
19
+ No authentication required - public RSS feeds only
20
+ """
21
+
22
+ # Premium financial subreddits
23
+ SUBREDDITS = {
24
+ # Financial & Markets
25
+ 'wallstreetbets': {
26
+ 'url': 'https://www.reddit.com/r/wallstreetbets/top/.rss?t=day',
27
+ 'weight': 1.6,
28
+ 'specialization': ['markets'],
29
+ 'category': 'markets'
30
+ },
31
+ 'stocks': {
32
+ 'url': 'https://www.reddit.com/r/stocks/top/.rss?t=day',
33
+ 'weight': 1.7,
34
+ 'specialization': ['markets'],
35
+ 'category': 'markets'
36
+ },
37
+ 'investing': {
38
+ 'url': 'https://www.reddit.com/r/investing/top/.rss?t=day',
39
+ 'weight': 1.8,
40
+ 'specialization': ['markets', 'macro'],
41
+ 'category': 'markets'
42
+ },
43
+ 'stockmarket': {
44
+ 'url': 'https://www.reddit.com/r/StockMarket/top/.rss?t=day',
45
+ 'weight': 1.6,
46
+ 'specialization': ['markets'],
47
+ 'category': 'markets'
48
+ },
49
+ 'options': {
50
+ 'url': 'https://www.reddit.com/r/options/top/.rss?t=day',
51
+ 'weight': 1.5,
52
+ 'specialization': ['markets'],
53
+ 'category': 'markets'
54
+ },
55
+ 'daytrading': {
56
+ 'url': 'https://www.reddit.com/r/Daytrading/top/.rss?t=day',
57
+ 'weight': 1.5,
58
+ 'specialization': ['markets'],
59
+ 'category': 'markets'
60
+ },
61
+ 'securityanalysis': {
62
+ 'url': 'https://www.reddit.com/r/SecurityAnalysis/top/.rss?t=day',
63
+ 'weight': 1.7,
64
+ 'specialization': ['markets'],
65
+ 'category': 'markets'
66
+ },
67
+
68
+ # Economics & Macro
69
+ 'economics': {
70
+ 'url': 'https://www.reddit.com/r/Economics/top/.rss?t=day',
71
+ 'weight': 1.8,
72
+ 'specialization': ['macro'],
73
+ 'category': 'macro'
74
+ },
75
+ 'economy': {
76
+ 'url': 'https://www.reddit.com/r/economy/top/.rss?t=day',
77
+ 'weight': 1.6,
78
+ 'specialization': ['macro'],
79
+ 'category': 'macro'
80
+ },
81
+
82
+ # Quantitative Finance
83
+ 'algotrading': {
84
+ 'url': 'https://www.reddit.com/r/algotrading/top/.rss?t=day',
85
+ 'weight': 1.7,
86
+ 'specialization': ['markets'],
87
+ 'category': 'markets'
88
+ },
89
+ 'quantfinance': {
90
+ 'url': 'https://www.reddit.com/r/quant/top/.rss?t=day',
91
+ 'weight': 1.7,
92
+ 'specialization': ['markets'],
93
+ 'category': 'markets'
94
+ },
95
+
96
+ # Geopolitics
97
+ 'geopolitics': {
98
+ 'url': 'https://www.reddit.com/r/geopolitics/top/.rss?t=day',
99
+ 'weight': 1.8,
100
+ 'specialization': ['geopolitical'],
101
+ 'category': 'geopolitical'
102
+ },
103
+ 'worldnews': {
104
+ 'url': 'https://www.reddit.com/r/worldnews/top/.rss?t=day',
105
+ 'weight': 1.7,
106
+ 'specialization': ['geopolitical'],
107
+ 'category': 'geopolitical'
108
+ },
109
+ 'neutralpolitics': {
110
+ 'url': 'https://www.reddit.com/r/NeutralPolitics/top/.rss?t=day',
111
+ 'weight': 1.6,
112
+ 'specialization': ['geopolitical'],
113
+ 'category': 'geopolitical'
114
+ },
115
+ }
116
+
117
+ # Keyword detection for additional categorization
118
+ MACRO_KEYWORDS = [
119
+ 'Fed', 'ECB', 'BoE', 'BoJ', 'FOMC', 'Powell', 'Lagarde',
120
+ 'interest rate', 'inflation', 'CPI', 'PPI', 'GDP',
121
+ 'unemployment', 'jobs report', 'NFP', 'central bank',
122
+ 'recession', 'QE', 'quantitative easing', 'monetary policy'
123
+ ]
124
+
125
+ MARKETS_KEYWORDS = [
126
+ 'stock', 'equity', 'bond', 'commodity', 'oil', 'gold',
127
+ 'earnings', 'revenue', 'profit', 'IPO', 'merger',
128
+ 'acquisition', 'trading', 'options', 'futures', 'forex'
129
+ ]
130
+
131
+ GEOPOLITICAL_KEYWORDS = [
132
+ 'war', 'conflict', 'sanction', 'trade', 'tariff',
133
+ 'election', 'China', 'Russia', 'Ukraine', 'Taiwan',
134
+ 'Middle East', 'Iran', 'Israel', 'NATO', 'UN'
135
+ ]
136
+
137
+ def __init__(self):
138
+ """Initialize Reddit monitor"""
139
+ pass
140
+
141
+ def _categorize_post(self, title: str, subreddit_info: Dict) -> str:
142
+ """Categorize post based on title and subreddit"""
143
+ title_lower = title.lower()
144
+
145
+ # Use subreddit default category
146
+ default_category = subreddit_info.get('category', 'markets')
147
+
148
+ # Check keywords for override
149
+ if any(keyword.lower() in title_lower for keyword in self.MACRO_KEYWORDS):
150
+ return 'macro'
151
+ elif any(keyword.lower() in title_lower for keyword in self.GEOPOLITICAL_KEYWORDS):
152
+ return 'geopolitical'
153
+ elif any(keyword.lower() in title_lower for keyword in self.MARKETS_KEYWORDS):
154
+ return 'markets'
155
+
156
+ return default_category
157
+
158
+ def _detect_sentiment(self, title: str) -> str:
159
+ """Simple sentiment detection based on keywords"""
160
+ title_lower = title.lower()
161
+
162
+ positive_words = ['bullish', 'bull', 'surge', 'gain', 'up', 'rally', 'boom', 'profit', 'growth']
163
+ negative_words = ['bearish', 'bear', 'crash', 'loss', 'down', 'fall', 'decline', 'recession', 'crisis']
164
+
165
+ positive_count = sum(1 for word in positive_words if word in title_lower)
166
+ negative_count = sum(1 for word in negative_words if word in title_lower)
167
+
168
+ if positive_count > negative_count:
169
+ return 'positive'
170
+ elif negative_count > positive_count:
171
+ return 'negative'
172
+ else:
173
+ return 'neutral'
174
+
175
+ def _calculate_impact(self, score: int, num_comments: int, subreddit_weight: float) -> str:
176
+ """Calculate impact based on upvotes, comments, and subreddit weight"""
177
+ # Normalize score (upvotes - downvotes)
178
+ engagement_score = (score * 0.7) + (num_comments * 0.3)
179
+ weighted_score = engagement_score * subreddit_weight
180
+
181
+ if weighted_score > 500:
182
+ return 'high'
183
+ elif weighted_score > 100:
184
+ return 'medium'
185
+ else:
186
+ return 'low'
187
+
188
+ def scrape_reddit_news(self, max_posts: int = 100, hours: int = 12) -> List[Dict]:
189
+ """
190
+ Scrape Reddit posts from financial subreddits
191
+
192
+ Args:
193
+ max_posts: Maximum number of posts to return
194
+ hours: Only include posts from the last N hours (default: 12)
195
+
196
+ Returns:
197
+ List of news items with metadata
198
+ """
199
+ all_posts = []
200
+ seen_titles = set()
201
+ cutoff_time = datetime.now() - timedelta(hours=hours)
202
+
203
+ logger.info(f"Scraping Reddit posts from last {hours} hours...")
204
+
205
+ for subreddit_name, subreddit_info in self.SUBREDDITS.items():
206
+ try:
207
+ logger.info(f"Fetching r/{subreddit_name}...")
208
+
209
+ # Parse RSS feed
210
+ feed = feedparser.parse(subreddit_info['url'])
211
+
212
+ for entry in feed.entries[:20]: # Get top 20 per subreddit
213
+ try:
214
+ # Parse publication date
215
+ if hasattr(entry, 'published_parsed'):
216
+ pub_date = datetime(*entry.published_parsed[:6])
217
+ else:
218
+ pub_date = datetime.now()
219
+
220
+ # Filter by time (last 12 hours by default)
221
+ if pub_date < cutoff_time:
222
+ continue
223
+
224
+ # Extract title and link
225
+ title = entry.title.strip()
226
+ link = entry.link
227
+
228
+ # Deduplicate
229
+ title_hash = hash(title[:100])
230
+ if title_hash in seen_titles:
231
+ continue
232
+ seen_titles.add(title_hash)
233
+
234
+ # Extract score and comments from content
235
+ score = 0
236
+ num_comments = 0
237
+ if hasattr(entry, 'content'):
238
+ content_text = entry.content[0].value if entry.content else ''
239
+ # Try to extract score from content
240
+ score_match = re.search(r'(\d+)\s+points?', content_text)
241
+ if score_match:
242
+ score = int(score_match.group(1))
243
+ # Try to extract comments
244
+ comment_match = re.search(r'(\d+)\s+comments?', content_text)
245
+ if comment_match:
246
+ num_comments = int(comment_match.group(1))
247
+
248
+ # Categorize and analyze
249
+ category = self._categorize_post(title, subreddit_info)
250
+ sentiment = self._detect_sentiment(title)
251
+ impact = self._calculate_impact(score, num_comments, subreddit_info['weight'])
252
+
253
+ # Check if breaking news (high score in last 3 hours)
254
+ is_breaking = (
255
+ (datetime.now() - pub_date).total_seconds() < 10800 and # 3 hours
256
+ score > 1000
257
+ )
258
+
259
+ post_data = {
260
+ 'title': title,
261
+ 'summary': title, # Reddit posts don't have separate summaries
262
+ 'url': link,
263
+ 'source': f"r/{subreddit_name}",
264
+ 'timestamp': pub_date,
265
+ 'category': category,
266
+ 'sentiment': sentiment,
267
+ 'impact': impact,
268
+ 'is_breaking': is_breaking,
269
+ 'engagement': {
270
+ 'score': score,
271
+ 'comments': num_comments
272
+ },
273
+ 'platform': 'reddit'
274
+ }
275
+
276
+ all_posts.append(post_data)
277
+
278
+ except Exception as e:
279
+ logger.error(f"Error processing entry from r/{subreddit_name}: {e}")
280
+ continue
281
+
282
+ logger.info(f"Fetched {len([p for p in all_posts if p['source'] == f'r/{subreddit_name}'])} posts from r/{subreddit_name}")
283
+
284
+ except Exception as e:
285
+ logger.error(f"Error fetching r/{subreddit_name}: {e}")
286
+ continue
287
+
288
+ # Sort by engagement score (weighted by source weight)
289
+ all_posts.sort(key=lambda x: x['engagement']['score'] * self.SUBREDDITS.get(
290
+ x['source'].replace('r/', ''), {}
291
+ ).get('weight', 1.0), reverse=True)
292
+
293
+ logger.info(f"Total Reddit posts scraped: {len(all_posts)}")
294
+
295
+ return all_posts[:max_posts]
296
+
297
+ def get_statistics(self) -> Dict:
298
+ """
299
+ Get statistics about scraped Reddit posts
300
+ Note: Statistics are now managed by NewsCacheManager
301
+ This method returns empty stats for backward compatibility
302
+ """
303
+ return {
304
+ 'total': 0,
305
+ 'high_impact': 0,
306
+ 'breaking': 0,
307
+ 'by_category': {
308
+ 'macro': 0,
309
+ 'markets': 0,
310
+ 'geopolitical': 0
311
+ }
312
+ }
app/services/sectoral_news.py ADDED
@@ -0,0 +1,426 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Sectoral News Scraper - 7 Major Market Sectors
3
+ Filters and aggregates news by sector: Finance, Tech, Energy, Healthcare, Consumer, Industrials, Real Estate
4
+ Leverages existing RSS infrastructure with sector-specific classification
5
+ """
6
+
7
+ from datetime import datetime, timedelta
8
+ from typing import List, Dict, Optional
9
+ import logging
10
+ import re
11
+ from concurrent.futures import ThreadPoolExecutor
12
+
13
+ import requests
14
+ import pandas as pd
15
+ import feedparser
16
+ from bs4 import BeautifulSoup
17
+
18
+ # Configure logging
19
+ logging.basicConfig(level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class SectoralNewsScraper:
24
+ """
25
+ Aggregates news by market sector
26
+ Uses RSS feeds + keyword classification
27
+ """
28
+
29
+ # 7 Sector configuration with keywords and RSS feeds
30
+ SECTORS = {
31
+ 'finance': {
32
+ 'name': 'Finance',
33
+ 'keywords': [
34
+ 'bank', 'JPMorgan', 'Goldman Sachs', 'Morgan Stanley', 'Wells Fargo',
35
+ 'Citigroup', 'Bank of America', 'fintech', 'lending', 'credit',
36
+ 'financial sector', 'banking', 'insurance', 'asset management'
37
+ ],
38
+ 'rss_sources': [
39
+ 'https://www.cnbc.com/id/10000664/device/rss/rss.html', # CNBC Banking
40
+ 'https://feeds.bloomberg.com/markets/news.rss'
41
+ ],
42
+ 'weight': 1.5
43
+ },
44
+ 'tech': {
45
+ 'name': 'Technology',
46
+ 'keywords': [
47
+ 'Apple', 'Microsoft', 'Google', 'Alphabet', 'Amazon', 'Meta', 'Facebook',
48
+ 'NVIDIA', 'AMD', 'Intel', 'semiconductor', 'chip', 'software', 'cloud',
49
+ 'AI', 'artificial intelligence', 'tech sector', 'Silicon Valley', 'Tesla'
50
+ ],
51
+ 'rss_sources': [
52
+ 'https://www.cnbc.com/id/19854910/device/rss/rss.html', # CNBC Technology
53
+ 'https://techcrunch.com/feed/'
54
+ ],
55
+ 'weight': 1.5
56
+ },
57
+ 'energy': {
58
+ 'name': 'Energy',
59
+ 'keywords': [
60
+ 'oil', 'gas', 'crude', 'petroleum', 'OPEC', 'Exxon', 'ExxonMobil', 'Chevron',
61
+ 'ConocoPhillips', 'renewable', 'solar', 'wind', 'energy sector', 'pipeline',
62
+ 'natural gas', 'LNG', 'fracking', 'drilling'
63
+ ],
64
+ 'rss_sources': [
65
+ 'https://www.cnbc.com/id/19832390/device/rss/rss.html', # CNBC Energy
66
+ ],
67
+ 'weight': 1.6
68
+ },
69
+ 'healthcare': {
70
+ 'name': 'Healthcare',
71
+ 'keywords': [
72
+ 'pharma', 'pharmaceutical', 'biotech', 'FDA', 'drug', 'vaccine', 'clinical trial',
73
+ 'Pfizer', 'Johnson & Johnson', 'Merck', 'AbbVie', 'Bristol Myers',
74
+ 'healthcare', 'hospital', 'medical device', 'therapeutics'
75
+ ],
76
+ 'rss_sources': [
77
+ 'https://www.cnbc.com/id/10000108/device/rss/rss.html', # CNBC Health
78
+ ],
79
+ 'weight': 1.5
80
+ },
81
+ 'consumer': {
82
+ 'name': 'Consumer & Retail',
83
+ 'keywords': [
84
+ 'retail', 'Amazon', 'Walmart', 'Target', 'Costco', 'Home Depot',
85
+ 'e-commerce', 'consumer', 'shopping', 'Black Friday', 'sales',
86
+ 'Nike', 'Starbucks', 'McDonald\'s', 'consumer goods', 'discretionary'
87
+ ],
88
+ 'rss_sources': [
89
+ 'https://www.cnbc.com/id/10001009/device/rss/rss.html', # CNBC Retail
90
+ ],
91
+ 'weight': 1.3
92
+ },
93
+ 'industrials': {
94
+ 'name': 'Industrials',
95
+ 'keywords': [
96
+ 'Boeing', 'Airbus', 'Caterpillar', 'Deere', '3M', 'GE', 'General Electric',
97
+ 'Honeywell', 'Lockheed Martin', 'manufacturing', 'industrial',
98
+ 'aerospace', 'defense', 'machinery', 'equipment', 'logistics', 'freight'
99
+ ],
100
+ 'rss_sources': [
101
+ 'https://www.reuters.com/rss/businessNews', # Reuters Business
102
+ ],
103
+ 'weight': 1.4
104
+ },
105
+ 'real_estate': {
106
+ 'name': 'Real Estate',
107
+ 'keywords': [
108
+ 'housing', 'mortgage', 'REIT', 'real estate', 'property', 'home sales',
109
+ 'construction', 'residential', 'commercial real estate', 'housing market',
110
+ 'home prices', 'rent', 'rental', 'builder', 'homebuilder'
111
+ ],
112
+ 'rss_sources': [], # Will rely on keyword filtering from general news
113
+ 'weight': 1.3
114
+ }
115
+ }
116
+
117
+ def __init__(self):
118
+ """Initialize scraper"""
119
+ self.session = requests.Session()
120
+ self.session.headers.update({
121
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
122
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
123
+ 'Accept-Language': 'en-US,en;q=0.9',
124
+ })
125
+
126
+ def scrape_sectoral_news(self, max_items: int = 50, hours: int = 24) -> List[Dict]:
127
+ """
128
+ Scrape and classify news by sector
129
+ Returns aggregated list sorted by sector and timestamp
130
+ """
131
+ all_news = []
132
+ seen_urls = set()
133
+
134
+ # Parallel fetch from all sector RSS feeds
135
+ with ThreadPoolExecutor(max_workers=7) as executor:
136
+ futures = []
137
+
138
+ for sector_id, sector_info in self.SECTORS.items():
139
+ # Submit RSS fetching task for each sector
140
+ futures.append((
141
+ executor.submit(self._fetch_sector_news, sector_id, sector_info, hours),
142
+ sector_id
143
+ ))
144
+
145
+ for future, sector_id in futures:
146
+ try:
147
+ sector_news = future.result(timeout=35)
148
+
149
+ # Deduplicate by URL
150
+ for item in sector_news:
151
+ if item['url'] not in seen_urls:
152
+ seen_urls.add(item['url'])
153
+ all_news.append(item)
154
+
155
+ logger.info(f"Fetched {len(sector_news)} items for {sector_id}")
156
+
157
+ except Exception as e:
158
+ logger.error(f"Error fetching {sector_id} news: {e}")
159
+
160
+ # If no news fetched, use mock data
161
+ if not all_news:
162
+ logger.warning("No sectoral news fetched - using mock data")
163
+ return self._get_mock_sectoral_news()
164
+
165
+ # Sort by sector priority and timestamp
166
+ all_news.sort(
167
+ key=lambda x: (x['sector'] != 'tech', x['sector'] != 'finance', -x['timestamp'].timestamp()),
168
+ )
169
+
170
+ return all_news[:max_items]
171
+
172
+ def _fetch_sector_news(self, sector_id: str, sector_info: Dict, hours: int) -> List[Dict]:
173
+ """Fetch news for a specific sector"""
174
+ sector_news = []
175
+
176
+ # Fetch from sector-specific RSS feeds
177
+ for rss_url in sector_info['rss_sources']:
178
+ try:
179
+ feed_news = self._fetch_rss_feed(rss_url, sector_id, sector_info, hours)
180
+ sector_news.extend(feed_news)
181
+ except Exception as e:
182
+ logger.debug(f"Error fetching RSS {rss_url}: {e}")
183
+
184
+ # If no RSS news, could also filter general news sources by keywords
185
+ # (This would require access to FinanceNewsScraper - skipping for now)
186
+
187
+ return sector_news
188
+
189
+ def _fetch_rss_feed(self, rss_url: str, sector_id: str, sector_info: Dict, hours: int) -> List[Dict]:
190
+ """Fetch and parse RSS feed for sector"""
191
+ try:
192
+ feed = feedparser.parse(rss_url)
193
+
194
+ if not feed.entries:
195
+ return []
196
+
197
+ news_items = []
198
+ cutoff_time = datetime.now() - timedelta(hours=hours)
199
+
200
+ for entry in feed.entries[:15]: # Limit to 15 per feed
201
+ try:
202
+ # Parse timestamp
203
+ if hasattr(entry, 'published_parsed') and entry.published_parsed:
204
+ timestamp = datetime(*entry.published_parsed[:6])
205
+ elif hasattr(entry, 'updated_parsed') and entry.updated_parsed:
206
+ timestamp = datetime(*entry.updated_parsed[:6])
207
+ else:
208
+ timestamp = datetime.now()
209
+
210
+ # Skip old news
211
+ if timestamp < cutoff_time:
212
+ continue
213
+
214
+ # Extract title and summary
215
+ title = entry.get('title', '')
216
+ summary = entry.get('summary', '') or entry.get('description', '')
217
+
218
+ # Clean HTML from summary
219
+ if summary:
220
+ summary = BeautifulSoup(summary, 'html.parser').get_text()
221
+ summary = summary[:200] + '...' if len(summary) > 200 else summary
222
+
223
+ url = entry.get('link', '')
224
+
225
+ # Verify sector relevance by keywords
226
+ text = f"{title} {summary}".lower()
227
+ keyword_matches = sum(1 for kw in sector_info['keywords'] if kw.lower() in text)
228
+
229
+ # Skip if not relevant enough (unless from sector-specific feed)
230
+ if keyword_matches == 0 and len(sector_info['rss_sources']) > 3:
231
+ continue
232
+
233
+ # Categorize and analyze
234
+ category = self._categorize_news(text)
235
+ sentiment = self._analyze_sentiment(text)
236
+ impact = self._assess_impact(sector_info['weight'], keyword_matches)
237
+
238
+ news_items.append({
239
+ 'id': hash(url),
240
+ 'title': title,
241
+ 'summary': summary or title[:200],
242
+ 'source': sector_info['name'],
243
+ 'sector': sector_id, # Add sector field
244
+ 'category': category,
245
+ 'timestamp': timestamp,
246
+ 'sentiment': sentiment,
247
+ 'impact': impact,
248
+ 'url': url,
249
+ 'likes': 0,
250
+ 'retweets': 0,
251
+ 'is_breaking': False,
252
+ 'source_weight': sector_info['weight'],
253
+ 'from_web': False
254
+ })
255
+
256
+ except Exception as e:
257
+ logger.debug(f"Error parsing RSS entry: {e}")
258
+ continue
259
+
260
+ return news_items
261
+
262
+ except Exception as e:
263
+ logger.error(f"Error fetching RSS feed {rss_url}: {e}")
264
+ return []
265
+
266
+ def _categorize_news(self, text: str) -> str:
267
+ """Categorize news (macro, markets, geopolitical)"""
268
+ macro_keywords = ['Fed', 'ECB', 'inflation', 'rate', 'GDP', 'economy', 'recession']
269
+ markets_keywords = ['stock', 'earnings', 'revenue', 'profit', 'IPO', 'merger', 'acquisition']
270
+ geo_keywords = ['China', 'tariff', 'trade war', 'sanctions', 'regulation']
271
+
272
+ macro_score = sum(1 for kw in macro_keywords if kw.lower() in text)
273
+ markets_score = sum(1 for kw in markets_keywords if kw.lower() in text)
274
+ geo_score = sum(1 for kw in geo_keywords if kw.lower() in text)
275
+
276
+ scores = {'macro': macro_score, 'markets': markets_score, 'geopolitical': geo_score}
277
+ return max(scores, key=scores.get) if max(scores.values()) > 0 else 'markets'
278
+
279
+ def _analyze_sentiment(self, text: str) -> str:
280
+ """Analyze sentiment based on keywords"""
281
+ positive = ['surge', 'soar', 'rally', 'beat', 'upgrade', 'gain', 'rise', 'bullish', 'positive']
282
+ negative = ['plunge', 'crash', 'fall', 'miss', 'downgrade', 'loss', 'drop', 'bearish', 'negative']
283
+
284
+ pos_count = sum(1 for word in positive if word in text)
285
+ neg_count = sum(1 for word in negative if word in text)
286
+
287
+ if pos_count > neg_count:
288
+ return 'positive'
289
+ elif neg_count > pos_count:
290
+ return 'negative'
291
+ return 'neutral'
292
+
293
+ def _assess_impact(self, sector_weight: float, keyword_matches: int) -> str:
294
+ """Assess impact based on sector weight and keyword relevance"""
295
+ if sector_weight >= 1.5 and keyword_matches >= 3:
296
+ return 'high'
297
+ elif keyword_matches >= 2:
298
+ return 'medium'
299
+ else:
300
+ return 'low'
301
+
302
+ def _get_mock_sectoral_news(self) -> List[Dict]:
303
+ """Mock sectoral news for development"""
304
+ now = datetime.now()
305
+
306
+ return [
307
+ {
308
+ 'id': 1,
309
+ 'title': 'Apple announces new iPhone with advanced AI capabilities',
310
+ 'summary': 'Apple unveils next-generation iPhone featuring on-device AI processing',
311
+ 'source': 'Technology',
312
+ 'sector': 'tech',
313
+ 'category': 'markets',
314
+ 'timestamp': now - timedelta(minutes=30),
315
+ 'sentiment': 'positive',
316
+ 'impact': 'high',
317
+ 'url': 'https://techcrunch.com',
318
+ 'likes': 0,
319
+ 'retweets': 0,
320
+ 'is_breaking': False,
321
+ 'source_weight': 1.5,
322
+ 'from_web': False
323
+ },
324
+ {
325
+ 'id': 2,
326
+ 'title': 'JPMorgan reports strong Q4 earnings beat analyst expectations',
327
+ 'summary': 'Major investment bank posts record profits amid trading surge',
328
+ 'source': 'Finance',
329
+ 'sector': 'finance',
330
+ 'category': 'markets',
331
+ 'timestamp': now - timedelta(hours=1),
332
+ 'sentiment': 'positive',
333
+ 'impact': 'high',
334
+ 'url': 'https://cnbc.com',
335
+ 'likes': 0,
336
+ 'retweets': 0,
337
+ 'is_breaking': False,
338
+ 'source_weight': 1.5,
339
+ 'from_web': False
340
+ },
341
+ {
342
+ 'id': 3,
343
+ 'title': 'OPEC+ extends oil production cuts through Q2',
344
+ 'summary': 'Major oil producers agree to maintain supply restrictions',
345
+ 'source': 'Energy',
346
+ 'sector': 'energy',
347
+ 'category': 'geopolitical',
348
+ 'timestamp': now - timedelta(hours=2),
349
+ 'sentiment': 'neutral',
350
+ 'impact': 'high',
351
+ 'url': 'https://reuters.com',
352
+ 'likes': 0,
353
+ 'retweets': 0,
354
+ 'is_breaking': False,
355
+ 'source_weight': 1.6,
356
+ 'from_web': False
357
+ },
358
+ {
359
+ 'id': 4,
360
+ 'title': 'Pfizer receives FDA approval for new cancer treatment',
361
+ 'summary': 'Breakthrough therapy approved for late-stage lung cancer',
362
+ 'source': 'Healthcare',
363
+ 'sector': 'healthcare',
364
+ 'category': 'markets',
365
+ 'timestamp': now - timedelta(hours=3),
366
+ 'sentiment': 'positive',
367
+ 'impact': 'medium',
368
+ 'url': 'https://cnbc.com',
369
+ 'likes': 0,
370
+ 'retweets': 0,
371
+ 'is_breaking': False,
372
+ 'source_weight': 1.5,
373
+ 'from_web': False
374
+ },
375
+ {
376
+ 'id': 5,
377
+ 'title': 'Amazon expands same-day delivery to 50 new cities',
378
+ 'summary': 'E-commerce giant accelerates logistics network expansion',
379
+ 'source': 'Consumer & Retail',
380
+ 'sector': 'consumer',
381
+ 'category': 'markets',
382
+ 'timestamp': now - timedelta(hours=4),
383
+ 'sentiment': 'positive',
384
+ 'impact': 'medium',
385
+ 'url': 'https://techcrunch.com',
386
+ 'likes': 0,
387
+ 'retweets': 0,
388
+ 'is_breaking': False,
389
+ 'source_weight': 1.3,
390
+ 'from_web': False
391
+ },
392
+ {
393
+ 'id': 6,
394
+ 'title': 'Boeing wins $10B contract for new military aircraft',
395
+ 'summary': 'Defense contractor secures major government order',
396
+ 'source': 'Industrials',
397
+ 'sector': 'industrials',
398
+ 'category': 'markets',
399
+ 'timestamp': now - timedelta(hours=5),
400
+ 'sentiment': 'positive',
401
+ 'impact': 'medium',
402
+ 'url': 'https://reuters.com',
403
+ 'likes': 0,
404
+ 'retweets': 0,
405
+ 'is_breaking': False,
406
+ 'source_weight': 1.4,
407
+ 'from_web': False
408
+ },
409
+ {
410
+ 'id': 7,
411
+ 'title': 'US housing starts surge 15% in December',
412
+ 'summary': 'Construction activity rebounds amid lower mortgage rates',
413
+ 'source': 'Real Estate',
414
+ 'sector': 'real_estate',
415
+ 'category': 'macro',
416
+ 'timestamp': now - timedelta(hours=6),
417
+ 'sentiment': 'positive',
418
+ 'impact': 'medium',
419
+ 'url': 'https://cnbc.com',
420
+ 'likes': 0,
421
+ 'retweets': 0,
422
+ 'is_breaking': False,
423
+ 'source_weight': 1.3,
424
+ 'from_web': False
425
+ }
426
+ ]
app/services/twitter_news_playwright.py ADDED
@@ -0,0 +1,489 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Professional Finance News Monitor using Playwright
3
+ Real-time Twitter/X scraping without authentication
4
+ Optimized for low-latency trading decisions
5
+ """
6
+
7
+ import pandas as pd
8
+ from datetime import datetime, timedelta
9
+ from typing import List, Dict, Optional
10
+ import streamlit as st
11
+ import re
12
+ import logging
13
+ from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
14
+
15
+ # Configure logging
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+ try:
20
+ from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
21
+ PLAYWRIGHT_AVAILABLE = True
22
+ except ImportError:
23
+ PLAYWRIGHT_AVAILABLE = False
24
+ logger.warning("playwright not available. Install with: pip install playwright && playwright install chromium")
25
+
26
+
27
+ class TwitterFinanceMonitor:
28
+ """
29
+ Professional-grade financial news aggregator using Playwright
30
+ No authentication required - public Twitter/X profiles only
31
+ """
32
+
33
+ # Premium financial Twitter accounts
34
+ SOURCES = {
35
+ # ===== TIER 1: Breaking News Aggregators =====
36
+ 'walter_bloomberg': {
37
+ 'handle': 'WalterBloomberg',
38
+ 'url': 'https://x.com/WalterBloomberg',
39
+ 'weight': 1.9,
40
+ 'specialization': ['macro', 'markets', 'geopolitical']
41
+ },
42
+ 'fxhedge': {
43
+ 'handle': 'Fxhedgers',
44
+ 'url': 'https://x.com/Fxhedgers',
45
+ 'weight': 1.7,
46
+ 'specialization': ['macro', 'markets']
47
+ },
48
+ 'deitaone': {
49
+ 'handle': 'DeItaone',
50
+ 'url': 'https://x.com/DeItaone',
51
+ 'weight': 1.8,
52
+ 'specialization': ['markets', 'macro']
53
+ },
54
+ 'firstsquawk': {
55
+ 'handle': 'FirstSquawk',
56
+ 'url': 'https://x.com/FirstSquawk',
57
+ 'weight': 1.7,
58
+ 'specialization': ['markets', 'macro']
59
+ },
60
+ 'livesquawk': {
61
+ 'handle': 'LiveSquawk',
62
+ 'url': 'https://x.com/LiveSquawk',
63
+ 'weight': 1.7,
64
+ 'specialization': ['markets', 'macro']
65
+ },
66
+
67
+ # ===== TIER 2: Major News Agencies =====
68
+ 'reuters': {
69
+ 'handle': 'Reuters',
70
+ 'url': 'https://x.com/Reuters',
71
+ 'weight': 1.9,
72
+ 'specialization': ['geopolitical', 'macro', 'markets']
73
+ },
74
+ 'bloomberg': {
75
+ 'handle': 'business',
76
+ 'url': 'https://x.com/business',
77
+ 'weight': 1.9,
78
+ 'specialization': ['markets', 'macro']
79
+ },
80
+ 'ft': {
81
+ 'handle': 'FT',
82
+ 'url': 'https://x.com/FT',
83
+ 'weight': 1.8,
84
+ 'specialization': ['markets', 'macro', 'geopolitical']
85
+ },
86
+ 'wsj': {
87
+ 'handle': 'WSJ',
88
+ 'url': 'https://x.com/WSJ',
89
+ 'weight': 1.8,
90
+ 'specialization': ['markets', 'macro', 'geopolitical']
91
+ },
92
+ 'cnbc': {
93
+ 'handle': 'CNBC',
94
+ 'url': 'https://x.com/CNBC',
95
+ 'weight': 1.6,
96
+ 'specialization': ['markets', 'macro']
97
+ },
98
+ 'bbcbusiness': {
99
+ 'handle': 'BBCBusiness',
100
+ 'url': 'https://x.com/BBCBusiness',
101
+ 'weight': 1.7,
102
+ 'specialization': ['geopolitical', 'macro', 'markets']
103
+ },
104
+
105
+ # ===== TIER 3: Specialized Financial Media =====
106
+ 'zerohedge': {
107
+ 'handle': 'zerohedge',
108
+ 'url': 'https://x.com/zerohedge',
109
+ 'weight': 1.5,
110
+ 'specialization': ['macro', 'geopolitical', 'markets']
111
+ },
112
+ 'marketwatch': {
113
+ 'handle': 'MarketWatch',
114
+ 'url': 'https://x.com/MarketWatch',
115
+ 'weight': 1.6,
116
+ 'specialization': ['markets', 'macro']
117
+ },
118
+ 'unusual_whales': {
119
+ 'handle': 'unusual_whales',
120
+ 'url': 'https://x.com/unusual_whales',
121
+ 'weight': 1.5,
122
+ 'specialization': ['markets']
123
+ },
124
+ 'financialtimes': {
125
+ 'handle': 'FinancialTimes',
126
+ 'url': 'https://x.com/FinancialTimes',
127
+ 'weight': 1.8,
128
+ 'specialization': ['markets', 'macro', 'geopolitical']
129
+ },
130
+
131
+ # ===== TIER 4: Economists & Analysis =====
132
+ 'economics': {
133
+ 'handle': 'economics',
134
+ 'url': 'https://x.com/economics',
135
+ 'weight': 1.7,
136
+ 'specialization': ['macro', 'geopolitical']
137
+ },
138
+ 'ap': {
139
+ 'handle': 'AP',
140
+ 'url': 'https://x.com/AP',
141
+ 'weight': 1.7,
142
+ 'specialization': ['geopolitical', 'macro']
143
+ },
144
+ 'afp': {
145
+ 'handle': 'AFP',
146
+ 'url': 'https://x.com/AFP',
147
+ 'weight': 1.7,
148
+ 'specialization': ['geopolitical', 'macro']
149
+ },
150
+ 'ajenglish': {
151
+ 'handle': 'AJEnglish',
152
+ 'url': 'https://x.com/AJEnglish',
153
+ 'weight': 1.6,
154
+ 'specialization': ['geopolitical', 'macro']
155
+ }
156
+ }
157
+
158
+ # Keyword detection for categorization
159
+ MACRO_KEYWORDS = [
160
+ 'Fed', 'ECB', 'BoE', 'BoJ', 'FOMC', 'Powell', 'Lagarde',
161
+ 'interest rate', 'inflation', 'CPI', 'PPI', 'GDP',
162
+ 'unemployment', 'jobs report', 'NFP', 'central bank',
163
+ 'monetary policy', 'quantitative', 'recession'
164
+ ]
165
+
166
+ MARKET_KEYWORDS = [
167
+ 'S&P', 'Dow', 'Nasdaq', 'Russell', 'stocks', 'equities',
168
+ 'earnings', 'revenue', 'profit', 'shares', 'IPO',
169
+ 'merger', 'acquisition', 'crypto', 'Bitcoin', 'Ethereum',
170
+ 'oil', 'gold', 'commodities', 'futures', 'options'
171
+ ]
172
+
173
+ GEOPOLITICAL_KEYWORDS = [
174
+ 'war', 'conflict', 'sanctions', 'trade', 'tariff',
175
+ 'China', 'Russia', 'Ukraine', 'Taiwan', 'Middle East',
176
+ 'election', 'government', 'military', 'diplomatic',
177
+ 'treaty', 'EU', 'Brexit', 'OPEC'
178
+ ]
179
+
180
+ def __init__(self):
181
+ """Initialize monitor"""
182
+ # Find Chromium executable
183
+ self.chromium_path = self._find_chromium()
184
+
185
+ def _find_chromium(self) -> str:
186
+ """Find Chromium installation path"""
187
+ import os
188
+ import shutil
189
+
190
+ # Try common paths
191
+ paths = [
192
+ '/usr/bin/chromium',
193
+ '/usr/bin/chromium-browser',
194
+ '/usr/lib/chromium/chromium',
195
+ shutil.which('chromium'),
196
+ shutil.which('chromium-browser'),
197
+ ]
198
+
199
+ for path in paths:
200
+ if path and os.path.exists(path):
201
+ logger.info(f"Found Chromium at: {path}")
202
+ return path
203
+
204
+ logger.warning("Chromium not found in standard paths")
205
+ return '/usr/bin/chromium' # Fallback
206
+
207
+ def _scrape_twitter_profile(self, source_name: str, source_info: Dict, timeout: int = 30) -> List[Dict]:
208
+ """Scrape tweets from a single Twitter profile using Playwright"""
209
+ if not PLAYWRIGHT_AVAILABLE:
210
+ logger.warning("Playwright not available")
211
+ return []
212
+
213
+ try:
214
+ with sync_playwright() as p:
215
+ # Launch lightweight browser with aggressive performance flags
216
+ browser = p.chromium.launch(
217
+ executable_path=self.chromium_path,
218
+ headless=True,
219
+ args=[
220
+ '--disable-blink-features=AutomationControlled',
221
+ '--disable-dev-shm-usage', # Overcome limited resource problems
222
+ '--no-sandbox', # Required for some environments
223
+ '--disable-setuid-sandbox',
224
+ '--disable-gpu', # Not needed in headless
225
+ '--disable-software-rasterizer'
226
+ ]
227
+ )
228
+ context = browser.new_context(
229
+ user_agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
230
+ )
231
+ page = context.new_page()
232
+
233
+ # Block images, fonts, css, and videos for speed
234
+ def route_intercept(route):
235
+ if route.request.resource_type in ["image", "media", "font", "stylesheet", "video"]:
236
+ route.abort()
237
+ else:
238
+ route.continue_()
239
+
240
+ page.route("**/*", route_intercept)
241
+
242
+ # Navigate to profile with increased timeout
243
+ logger.info(f"Scraping {source_name}...")
244
+ page.goto(source_info['url'], timeout=timeout * 1000, wait_until="domcontentloaded")
245
+
246
+ # Wait for tweets to load with increased timeout
247
+ try:
248
+ page.wait_for_selector("article", timeout=15000) # Increased to 15 seconds
249
+ except PlaywrightTimeoutError:
250
+ logger.warning(f"Timeout waiting for tweets from {source_name}")
251
+ browser.close()
252
+ return []
253
+
254
+ # Extract tweet texts (limit to 15)
255
+ tweet_elements = page.locator("article div[data-testid='tweetText']").all()
256
+
257
+ news_items = []
258
+ for idx, element in enumerate(tweet_elements[:15]): # Reduced from 20 to 15 for speed
259
+ try:
260
+ text = element.text_content()
261
+ if not text or len(text) < 10:
262
+ continue
263
+
264
+ # Clean text
265
+ text = text.strip()
266
+ text = re.sub(r'\s+', ' ', text)
267
+
268
+ # Skip retweets and replies
269
+ if text.startswith('RT @') or text.startswith('@'):
270
+ continue
271
+
272
+ # Categorize and analyze
273
+ category = self._categorize_text(text, source_info['specialization'])
274
+ sentiment = self._analyze_sentiment(text)
275
+ impact = self._assess_impact(source_info['weight'], text)
276
+ is_breaking = self._detect_breaking_news(text)
277
+
278
+ # Create summary
279
+ summary = self._extract_summary(text) if len(text) > 150 else text
280
+
281
+ news_items.append({
282
+ 'id': hash(f"{source_name}_{idx}_{datetime.now().isoformat()}"),
283
+ 'title': text,
284
+ 'summary': summary,
285
+ 'source': source_info['handle'],
286
+ 'category': category,
287
+ 'timestamp': datetime.now() - timedelta(minutes=idx), # Approximate time
288
+ 'sentiment': sentiment,
289
+ 'impact': impact,
290
+ 'url': source_info['url'],
291
+ 'likes': 0,
292
+ 'retweets': 0,
293
+ 'is_breaking': is_breaking,
294
+ 'source_weight': source_info['weight'],
295
+ 'from_web': True
296
+ })
297
+
298
+ except Exception as e:
299
+ logger.debug(f"Error parsing tweet from {source_name}: {e}")
300
+ continue
301
+
302
+ browser.close()
303
+ logger.info(f"Scraped {len(news_items)} tweets from {source_name}")
304
+ return news_items
305
+
306
+ except Exception as e:
307
+ logger.error(f"Error scraping {source_name}: {e}")
308
+ return []
309
+
310
+ def scrape_twitter_news(self, max_tweets: int = 100) -> List[Dict]:
311
+ """
312
+ Scrape latest financial news from Twitter using Playwright
313
+ Runs in parallel for better performance - 19 sources in ~30-45 seconds
314
+ """
315
+ if not PLAYWRIGHT_AVAILABLE:
316
+ logger.info("Playwright not available - using mock data")
317
+ return self._get_mock_news()
318
+
319
+ all_news = []
320
+ seen_texts = set()
321
+
322
+ # Sort sources by weight (priority) - scrape high-value sources first
323
+ sorted_sources = sorted(
324
+ self.SOURCES.items(),
325
+ key=lambda x: x[1]['weight'],
326
+ reverse=True
327
+ )
328
+
329
+ # Scrape sources in parallel with moderate concurrency
330
+ # 8 workers = 19 sources in 3 batches (~60-90 seconds total)
331
+ with ThreadPoolExecutor(max_workers=8) as executor:
332
+ futures = []
333
+ for name, info in sorted_sources:
334
+ # Increased timeout for better success rate
335
+ future = executor.submit(self._scrape_twitter_profile, name, info, timeout=30)
336
+ futures.append((future, name))
337
+
338
+ for future, source_name in futures:
339
+ try:
340
+ # Wait max 35 seconds per source (increased for reliability)
341
+ news_items = future.result(timeout=35)
342
+
343
+ # Deduplicate based on text similarity
344
+ unique_items = []
345
+ for item in news_items:
346
+ text_hash = hash(item['title'][:100])
347
+ if text_hash not in seen_texts:
348
+ seen_texts.add(text_hash)
349
+ unique_items.append(item)
350
+
351
+ all_news.extend(unique_items)
352
+ if len(unique_items) > 0:
353
+ logger.info(f"Fetched {len(unique_items)} unique tweets from {source_name}")
354
+
355
+ except FuturesTimeoutError:
356
+ logger.warning(f"Timeout scraping {source_name} - skipping")
357
+ except Exception as e:
358
+ logger.error(f"Error processing {source_name}: {e}")
359
+
360
+ # If no news was fetched, use mock data
361
+ if not all_news:
362
+ logger.warning("No tweets fetched - using mock data")
363
+ return self._get_mock_news()
364
+
365
+ # Sort by breaking news, then impact, then timestamp
366
+ all_news.sort(
367
+ key=lambda x: (x['is_breaking'], x['impact'] == 'high', x['timestamp']),
368
+ reverse=True
369
+ )
370
+
371
+ logger.info(f"Total unique tweets: {len(all_news)}")
372
+ return all_news[:max_tweets]
373
+
374
+ def _categorize_text(self, text: str, source_specialization: List[str]) -> str:
375
+ """Categorize news based on keywords and source specialization"""
376
+ text_lower = text.lower()
377
+
378
+ # Count keyword matches
379
+ macro_score = sum(1 for kw in self.MACRO_KEYWORDS if kw.lower() in text_lower)
380
+ market_score = sum(1 for kw in self.MARKET_KEYWORDS if kw.lower() in text_lower)
381
+ geo_score = sum(1 for kw in self.GEOPOLITICAL_KEYWORDS if kw.lower() in text_lower)
382
+
383
+ # Boost scores based on source specialization
384
+ if 'macro' in source_specialization:
385
+ macro_score *= 1.5
386
+ if 'markets' in source_specialization:
387
+ market_score *= 1.5
388
+ if 'geopolitical' in source_specialization:
389
+ geo_score *= 1.5
390
+
391
+ # Return category with highest score
392
+ scores = {'macro': macro_score, 'markets': market_score, 'geopolitical': geo_score}
393
+ return max(scores, key=scores.get)
394
+
395
+ def _analyze_sentiment(self, text: str) -> str:
396
+ """Simple keyword-based sentiment analysis for trading"""
397
+ text_lower = text.lower()
398
+
399
+ positive_keywords = ['surge', 'rally', 'gain', 'rise', 'up', 'bullish', 'strong', 'beat', 'exceed']
400
+ negative_keywords = ['crash', 'plunge', 'fall', 'down', 'bearish', 'weak', 'miss', 'below', 'loss']
401
+
402
+ pos_count = sum(1 for kw in positive_keywords if kw in text_lower)
403
+ neg_count = sum(1 for kw in negative_keywords if kw in text_lower)
404
+
405
+ if pos_count > neg_count:
406
+ return 'positive'
407
+ elif neg_count > pos_count:
408
+ return 'negative'
409
+ return 'neutral'
410
+
411
+ def _assess_impact(self, source_weight: float, text: str) -> str:
412
+ """Assess market impact based on source weight and keywords"""
413
+ text_lower = text.lower()
414
+
415
+ high_impact_keywords = ['breaking', 'alert', 'urgent', 'flash', 'fed', 'powell', 'rate', 'war']
416
+ impact_score = sum(1 for kw in high_impact_keywords if kw in text_lower)
417
+
418
+ # Combine source weight and keyword impact
419
+ total_impact = source_weight + (impact_score * 0.3)
420
+
421
+ if total_impact >= 1.8:
422
+ return 'high'
423
+ elif total_impact >= 1.4:
424
+ return 'medium'
425
+ return 'low'
426
+
427
+ def _detect_breaking_news(self, text: str) -> bool:
428
+ """Detect if news is breaking/urgent"""
429
+ text_lower = text.lower()
430
+ breaking_keywords = ['breaking', 'alert', 'urgent', 'flash', '*breaking*', '🚨']
431
+ return any(kw in text_lower for kw in breaking_keywords)
432
+
433
+ def _extract_summary(self, text: str) -> str:
434
+ """Extract first 150 characters as summary"""
435
+ if len(text) <= 150:
436
+ return text
437
+ return text[:147] + "..."
438
+
439
+ def _get_mock_news(self) -> List[Dict]:
440
+ """Return mock data when scraping fails"""
441
+ mock_news = [
442
+ {
443
+ 'id': hash('mock1'),
444
+ 'title': 'Fed signals potential rate pause as inflation moderates',
445
+ 'summary': 'Fed signals potential rate pause as inflation moderates',
446
+ 'source': 'Mock Data',
447
+ 'category': 'macro',
448
+ 'timestamp': datetime.now() - timedelta(minutes=5),
449
+ 'sentiment': 'neutral',
450
+ 'impact': 'high',
451
+ 'url': 'https://x.com',
452
+ 'likes': 0,
453
+ 'retweets': 0,
454
+ 'is_breaking': False,
455
+ 'source_weight': 1.5,
456
+ 'from_web': True
457
+ },
458
+ {
459
+ 'id': hash('mock2'),
460
+ 'title': 'S&P 500 futures rise ahead of key earnings reports',
461
+ 'summary': 'S&P 500 futures rise ahead of key earnings reports',
462
+ 'source': 'Mock Data',
463
+ 'category': 'markets',
464
+ 'timestamp': datetime.now() - timedelta(minutes=15),
465
+ 'sentiment': 'positive',
466
+ 'impact': 'medium',
467
+ 'url': 'https://x.com',
468
+ 'likes': 0,
469
+ 'retweets': 0,
470
+ 'is_breaking': False,
471
+ 'source_weight': 1.5,
472
+ 'from_web': True
473
+ }
474
+ ]
475
+ return mock_news
476
+
477
+ def get_statistics(self) -> Dict:
478
+ """
479
+ Get statistics about cached news
480
+ Note: Statistics are now managed by NewsCacheManager
481
+ This method returns empty stats for backward compatibility
482
+ """
483
+ return {
484
+ 'total': 0,
485
+ 'high_impact': 0,
486
+ 'breaking': 0,
487
+ 'last_update': 'Managed by cache',
488
+ 'by_category': {}
489
+ }
app/styles.py ADDED
@@ -0,0 +1,331 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Dark theme CSS styles for the financial dashboard."""
2
+
3
+ DARK_THEME_CSS = """
4
+ <style>
5
+ :root {
6
+ --primary-color: #0066ff;
7
+ --secondary-color: #1f77e2;
8
+ --success-color: #00d084;
9
+ --danger-color: #ff3838;
10
+ --warning-color: #ffa500;
11
+ --bg-dark: #0e1117;
12
+ --bg-darker: #010409;
13
+ --text-primary: #e6edf3;
14
+ --text-secondary: #8b949e;
15
+ --border-color: #30363d;
16
+ }
17
+
18
+ /* Main background */
19
+ html, body {
20
+ background-color: var(--bg-darker) !important;
21
+ color: var(--text-primary) !important;
22
+ margin: 0 !important;
23
+ padding: 0 !important;
24
+ }
25
+
26
+ /* Streamlit containers */
27
+ .main, [data-testid="stAppViewContainer"] {
28
+ background-color: var(--bg-dark) !important;
29
+ }
30
+
31
+ /* Hide header and footer */
32
+ [data-testid="stHeader"] {
33
+ background-color: var(--bg-dark) !important;
34
+ }
35
+
36
+ [data-testid="stToolbar"] {
37
+ background-color: var(--bg-dark) !important;
38
+ }
39
+
40
+ .stApp {
41
+ background-color: var(--bg-dark) !important;
42
+ }
43
+
44
+ [data-testid="stDecoration"] {
45
+ background-color: var(--bg-dark) !important;
46
+ }
47
+
48
+ [data-testid="stSidebar"] {
49
+ background-color: #0d1117 !important;
50
+ border-right: 1px solid var(--border-color);
51
+ }
52
+
53
+ /* Text colors */
54
+ p, span, div, h1, h2, h3, h4, h5, h6, label, li, a {
55
+ color: var(--text-primary) !important;
56
+ }
57
+
58
+ /* Headings */
59
+ h1, h2, h3 {
60
+ color: var(--text-primary) !important;
61
+ font-weight: 700 !important;
62
+ }
63
+
64
+ /* Links */
65
+ a {
66
+ color: var(--primary-color) !important;
67
+ text-decoration: none !important;
68
+ }
69
+
70
+ a:hover {
71
+ color: var(--secondary-color) !important;
72
+ text-decoration: underline !important;
73
+ }
74
+
75
+ /* Labels and text inputs */
76
+ label {
77
+ color: var(--text-primary) !important;
78
+ font-weight: 500 !important;
79
+ }
80
+
81
+ /* Paragraph text */
82
+ p {
83
+ color: var(--text-primary) !important;
84
+ line-height: 1.6 !important;
85
+ }
86
+
87
+ /* Metric card styling */
88
+ [data-testid="metric-container"] {
89
+ background: linear-gradient(135deg, #1f2937 0%, #111827 100%) !important;
90
+ border: 1px solid var(--border-color) !important;
91
+ border-radius: 10px !important;
92
+ padding: 1.5rem !important;
93
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.3) !important;
94
+ }
95
+
96
+ .metric-card {
97
+ background: linear-gradient(135deg, #1f2937 0%, #111827 100%);
98
+ padding: 1.5rem;
99
+ border-radius: 10px;
100
+ border: 1px solid var(--border-color);
101
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.3);
102
+ }
103
+
104
+ .metric-value {
105
+ font-size: 2.5rem;
106
+ font-weight: 700;
107
+ color: var(--primary-color);
108
+ margin: 0.5rem 0;
109
+ }
110
+
111
+ .metric-label {
112
+ font-size: 0.875rem;
113
+ color: var(--text-secondary);
114
+ text-transform: uppercase;
115
+ letter-spacing: 0.05em;
116
+ }
117
+
118
+ .section-title {
119
+ color: var(--text-primary);
120
+ border-bottom: 2px solid var(--primary-color);
121
+ padding-bottom: 1rem;
122
+ margin-top: 2rem;
123
+ margin-bottom: 1.5rem;
124
+ }
125
+
126
+ /* Button styling */
127
+ .stButton > button {
128
+ background: linear-gradient(135deg, var(--primary-color) 0%, var(--secondary-color) 100%) !important;
129
+ color: #ffffff !important;
130
+ border: none !important;
131
+ border-radius: 8px !important;
132
+ padding: 0.75rem 2rem !important;
133
+ font-weight: 700 !important;
134
+ transition: all 0.3s ease !important;
135
+ box-shadow: 0 4px 6px rgba(0, 102, 255, 0.2) !important;
136
+ }
137
+
138
+ .stButton > button:hover {
139
+ box-shadow: 0 8px 16px rgba(0, 102, 255, 0.4) !important;
140
+ transform: translateY(-2px) !important;
141
+ }
142
+
143
+ .stButton > button:active {
144
+ transform: translateY(0) !important;
145
+ }
146
+
147
+ /* Input fields */
148
+ [data-testid="stTextInput"] input,
149
+ [data-testid="stSlider"] input {
150
+ background-color: #161b22 !important;
151
+ border: 1px solid var(--border-color) !important;
152
+ color: var(--text-primary) !important;
153
+ border-radius: 6px !important;
154
+ }
155
+
156
+ [data-testid="stTextInput"] input::placeholder {
157
+ color: var(--text-secondary) !important;
158
+ }
159
+
160
+ /* Slider */
161
+ [data-testid="stSlider"] {
162
+ color: var(--primary-color) !important;
163
+ }
164
+
165
+ /* Tabs */
166
+ [data-testid="stTabs"] [role="tablist"] {
167
+ background-color: transparent !important;
168
+ border-bottom: 2px solid var(--border-color) !important;
169
+ }
170
+
171
+ [data-testid="stTabs"] [role="tab"] {
172
+ color: var(--text-secondary) !important;
173
+ background-color: transparent !important;
174
+ border: none !important;
175
+ padding: 1rem 1.5rem !important;
176
+ }
177
+
178
+ [data-testid="stTabs"] [role="tab"][aria-selected="true"] {
179
+ color: var(--primary-color) !important;
180
+ border-bottom: 3px solid var(--primary-color) !important;
181
+ }
182
+
183
+ /* Dataframe */
184
+ [data-testid="dataframe"] {
185
+ background-color: #0d1117 !important;
186
+ }
187
+
188
+ .dataframe {
189
+ background-color: #0d1117 !important;
190
+ color: var(--text-primary) !important;
191
+ }
192
+
193
+ /* Info/Error boxes */
194
+ [data-testid="stInfo"],
195
+ [data-testid="stSuccess"],
196
+ [data-testid="stWarning"],
197
+ [data-testid="stError"] {
198
+ background-color: rgba(0, 102, 255, 0.1) !important;
199
+ border-left: 4px solid var(--primary-color) !important;
200
+ border-radius: 6px !important;
201
+ }
202
+
203
+ [data-testid="stError"] {
204
+ background-color: rgba(255, 56, 56, 0.1) !important;
205
+ border-left-color: var(--danger-color) !important;
206
+ }
207
+
208
+ /* Markdown */
209
+ [data-testid="stMarkdown"] {
210
+ color: var(--text-primary) !important;
211
+ }
212
+
213
+ /* Expander */
214
+ [data-testid="stExpander"] {
215
+ background-color: #161b22 !important;
216
+ border: 1px solid var(--border-color) !important;
217
+ border-radius: 6px !important;
218
+ }
219
+
220
+ /* Metric text styling */
221
+ [data-testid="metric-container"] p {
222
+ color: var(--text-primary) !important;
223
+ }
224
+
225
+ [data-testid="metric-container"] [data-testid="stMetricValue"] {
226
+ color: var(--primary-color) !important;
227
+ font-weight: 700 !important;
228
+ }
229
+
230
+ /* Slider label color */
231
+ [data-testid="stSlider"] label {
232
+ color: var(--text-primary) !important;
233
+ }
234
+
235
+ /* Text input label */
236
+ [data-testid="stTextInput"] label {
237
+ color: var(--text-primary) !important;
238
+ }
239
+
240
+ /* Write and markdown text */
241
+ [data-testid="stMarkdownContainer"] p {
242
+ color: var(--text-primary) !important;
243
+ }
244
+
245
+ [data-testid="stMarkdownContainer"] strong {
246
+ color: var(--primary-color) !important;
247
+ font-weight: 600 !important;
248
+ }
249
+
250
+ /* Spinner text */
251
+ [data-testid="stSpinner"] {
252
+ color: var(--primary-color) !important;
253
+ }
254
+
255
+ /* Column separators */
256
+ hr {
257
+ border-color: var(--border-color) !important;
258
+ }
259
+
260
+ /* Scrollbar */
261
+ ::-webkit-scrollbar {
262
+ width: 8px;
263
+ height: 8px;
264
+ }
265
+
266
+ ::-webkit-scrollbar-track {
267
+ background: #0d1117;
268
+ }
269
+
270
+ ::-webkit-scrollbar-thumb {
271
+ background: var(--border-color);
272
+ border-radius: 4px;
273
+ }
274
+
275
+ ::-webkit-scrollbar-thumb:hover {
276
+ background: var(--primary-color);
277
+ }
278
+
279
+ /* Selection highlighting */
280
+ ::selection {
281
+ background-color: var(--primary-color);
282
+ color: #fff;
283
+ }
284
+
285
+ /* Fix all white backgrounds */
286
+ .stApp > header {
287
+ background-color: var(--bg-dark) !important;
288
+ }
289
+
290
+ .stApp > header::before {
291
+ background: none !important;
292
+ }
293
+
294
+ .stApp > header::after {
295
+ background: none !important;
296
+ }
297
+
298
+ /* Streamlit elements background */
299
+ [data-testid="stVerticalBlock"] {
300
+ background-color: transparent !important;
301
+ }
302
+
303
+ [data-testid="stVerticalBlockBorderWrapper"] {
304
+ background-color: transparent !important;
305
+ }
306
+
307
+ /* Remove white decorative elements */
308
+ .st-emotion-cache-1gvbgyg {
309
+ background-color: var(--bg-dark) !important;
310
+ }
311
+
312
+ .st-emotion-cache-1jicfl2 {
313
+ background-color: var(--bg-dark) !important;
314
+ }
315
+
316
+ /* Ensure all root divs are dark */
317
+ div[class*="st-"] {
318
+ background-color: transparent !important;
319
+ }
320
+
321
+ /* Modal and overlay backgrounds */
322
+ .stModal {
323
+ background-color: var(--bg-dark) !important;
324
+ }
325
+
326
+ /* Alert boxes background */
327
+ .stAlert {
328
+ background-color: rgba(0, 102, 255, 0.1) !important;
329
+ }
330
+ </style>
331
+ """
app/ui.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """UI component functions for the financial dashboard."""
2
+
3
+ import streamlit as st
4
+ import pandas as pd
5
+ from data import format_financial_value, get_profitability_metrics
6
+
7
+
8
+ def display_price_metrics(metrics: dict):
9
+ """Display key price metrics in columns."""
10
+ st.markdown('<div class="section-title">πŸ“Š Price Metrics</div>', unsafe_allow_html=True)
11
+
12
+ col1, col2, col3, col4 = st.columns(4)
13
+
14
+ with col1:
15
+ st.metric("Current Price", f"${metrics['current_price']:.2f}",
16
+ f"{metrics['price_change']:+.2f}", delta_color="normal")
17
+
18
+ with col2:
19
+ st.metric("Day Change %", f"{metrics['price_change_pct']:+.2f}%",
20
+ None, delta_color="normal")
21
+
22
+ with col3:
23
+ st.metric("52W High", f"${metrics['high_52w']:.2f}")
24
+
25
+ with col4:
26
+ st.metric("52W Low", f"${metrics['low_52w']:.2f}")
27
+
28
+
29
+ def display_company_info(profile_info):
30
+ """Display company information."""
31
+ st.markdown('<div class="section-title">πŸ“‹ Company Information</div>', unsafe_allow_html=True)
32
+
33
+ if profile_info:
34
+ info_col1, info_col2 = st.columns(2)
35
+ with info_col1:
36
+ st.write(f"**Company Name:** {getattr(profile_info, 'name', 'N/A')}")
37
+ st.write(f"**Sector:** {getattr(profile_info, 'sector', 'N/A')}")
38
+ st.write(f"**Industry:** {getattr(profile_info, 'industry', 'N/A')}")
39
+
40
+ with info_col2:
41
+ st.write(f"**Country:** {getattr(profile_info, 'country', 'N/A')}")
42
+ st.write(f"**Exchange:** {getattr(profile_info, 'exchange', 'N/A')}")
43
+ st.write(f"**Website:** {getattr(profile_info, 'website', 'N/A')}")
44
+
45
+
46
+ def display_financial_metrics(income_stmt: pd.DataFrame):
47
+ """Display financial metrics from income statement."""
48
+ st.markdown('<div class="section-title">πŸ’° Financial Metrics</div>', unsafe_allow_html=True)
49
+
50
+ latest_income = income_stmt.iloc[0] if len(income_stmt) > 0 else None
51
+
52
+ if latest_income is not None:
53
+ # First row of metrics
54
+ fin_col1, fin_col2, fin_col3, fin_col4 = st.columns(4)
55
+
56
+ with fin_col1:
57
+ revenue = latest_income.get('total_revenue', 0)
58
+ if pd.notna(revenue) and revenue > 0:
59
+ st.metric("Total Revenue", format_financial_value(revenue))
60
+ else:
61
+ st.metric("Total Revenue", "N/A")
62
+
63
+ with fin_col2:
64
+ net_income = latest_income.get('net_income', 0)
65
+ if pd.notna(net_income) and net_income > 0:
66
+ st.metric("Net Income", format_financial_value(net_income))
67
+ else:
68
+ st.metric("Net Income", "N/A")
69
+
70
+ with fin_col3:
71
+ gross_profit = latest_income.get('gross_profit', 0)
72
+ if pd.notna(gross_profit) and gross_profit > 0:
73
+ st.metric("Gross Profit", format_financial_value(gross_profit))
74
+ else:
75
+ st.metric("Gross Profit", "N/A")
76
+
77
+ with fin_col4:
78
+ operating_income = latest_income.get('operating_income', 0)
79
+ if pd.notna(operating_income) and operating_income > 0:
80
+ st.metric("Operating Income", format_financial_value(operating_income))
81
+ else:
82
+ st.metric("Operating Income", "N/A")
83
+
84
+ # Second row of metrics
85
+ fin_col5, fin_col6, fin_col7, fin_col8 = st.columns(4)
86
+
87
+ with fin_col5:
88
+ eps = latest_income.get('diluted_earnings_per_share', 0)
89
+ if pd.notna(eps):
90
+ st.metric("EPS (Diluted)", f"${eps:.2f}")
91
+ else:
92
+ st.metric("EPS (Diluted)", "N/A")
93
+
94
+ with fin_col6:
95
+ ebitda = latest_income.get('ebitda', 0)
96
+ if pd.notna(ebitda) and ebitda > 0:
97
+ st.metric("EBITDA", format_financial_value(ebitda))
98
+ else:
99
+ st.metric("EBITDA", "N/A")
100
+
101
+ with fin_col7:
102
+ cogs = latest_income.get('cost_of_revenue', 0)
103
+ if pd.notna(cogs) and cogs > 0:
104
+ st.metric("Cost of Revenue", format_financial_value(cogs))
105
+ else:
106
+ st.metric("Cost of Revenue", "N/A")
107
+
108
+ with fin_col8:
109
+ rd_expense = latest_income.get('research_and_development_expense', 0)
110
+ if pd.notna(rd_expense) and rd_expense > 0:
111
+ st.metric("R&D Expense", format_financial_value(rd_expense))
112
+ else:
113
+ st.metric("R&D Expense", "N/A")
114
+
115
+
116
+ def display_income_statement(income_stmt: pd.DataFrame):
117
+ """Display formatted income statement table."""
118
+ st.markdown("### Income Statement")
119
+
120
+ if not income_stmt.empty:
121
+ display_columns = [
122
+ 'period_ending',
123
+ 'total_revenue',
124
+ 'cost_of_revenue',
125
+ 'gross_profit',
126
+ 'operating_income',
127
+ 'net_income',
128
+ 'diluted_earnings_per_share',
129
+ 'ebitda'
130
+ ]
131
+
132
+ available_cols = [col for col in display_columns if col in income_stmt.columns]
133
+ financial_display = income_stmt[available_cols].copy()
134
+
135
+ for col in financial_display.columns:
136
+ if col != 'period_ending':
137
+ financial_display[col] = financial_display[col].apply(
138
+ lambda x: format_financial_value(x)
139
+ )
140
+
141
+ st.dataframe(financial_display, use_container_width=True, hide_index=True)
142
+
143
+
144
+ def display_profitability_metrics(income_stmt: pd.DataFrame):
145
+ """Display profitability metrics."""
146
+ st.markdown("### Profitability Metrics")
147
+
148
+ prof_col1, prof_col2 = st.columns(2)
149
+ latest_data = income_stmt.iloc[0]
150
+ metrics = get_profitability_metrics(latest_data)
151
+
152
+ with prof_col1:
153
+ if "gross_margin" in metrics:
154
+ st.metric("Gross Margin", f"{metrics['gross_margin']:.2f}%")
155
+ if "net_margin" in metrics:
156
+ st.metric("Net Profit Margin", f"{metrics['net_margin']:.2f}%")
157
+
158
+ with prof_col2:
159
+ if "operating_margin" in metrics:
160
+ st.metric("Operating Margin", f"{metrics['operating_margin']:.2f}%")
161
+
162
+ if len(income_stmt) > 1:
163
+ prev_revenue = income_stmt.iloc[1].get('total_revenue', 0)
164
+ total_rev = latest_data.get('total_revenue', 0)
165
+ if prev_revenue and prev_revenue > 0:
166
+ revenue_growth = ((total_rev - prev_revenue) / prev_revenue) * 100
167
+ st.metric("Revenue Growth (YoY)", f"{revenue_growth:+.2f}%")
app/utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Utilities package for financial platform."""
app/utils/ai_summary_cache.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Shared in-memory AI summary cache with buffering and batching."""
2
+
3
+ import os
4
+ import threading
5
+ from datetime import datetime, timedelta
6
+ from typing import Dict, List, Optional, Tuple
7
+
8
+ from utils.llm_summarizer import OpenAICompatSummarizer
9
+
10
+ # Approx 4 chars per token -> 600 tokens ~= 2400 chars
11
+ DEFAULT_BATCH_MAX_CHARS = int(os.getenv("LLM_SUMMARY_BATCH_MAX_CHARS", "2400"))
12
+ BUFFER_SECONDS = int(os.getenv("LLM_SUMMARY_BUFFER_SECONDS", "120"))
13
+
14
+
15
+ class AISummaryCache:
16
+ def __init__(self):
17
+ self._lock = threading.Lock()
18
+ self._buffer: List[Dict] = []
19
+ self._buffer_start: Optional[datetime] = None
20
+ self._summaries: Dict[str, Dict] = {}
21
+ self._last_update: Optional[datetime] = None
22
+
23
+ def buffer_items(self, items: List[Dict]):
24
+ if not items:
25
+ return
26
+ with self._lock:
27
+ for item in items:
28
+ key = self._item_key(item)
29
+ if not key or key in self._summaries:
30
+ continue
31
+ self._buffer.append(item)
32
+ if self._buffer and self._buffer_start is None:
33
+ self._buffer_start = datetime.now()
34
+
35
+ def maybe_flush(self):
36
+ with self._lock:
37
+ if not self._buffer or self._buffer_start is None:
38
+ return
39
+ if datetime.now() - self._buffer_start < timedelta(seconds=BUFFER_SECONDS):
40
+ return
41
+ items = self._buffer
42
+ self._buffer = []
43
+ self._buffer_start = None
44
+
45
+ summarizer = OpenAICompatSummarizer()
46
+ if not summarizer.enabled:
47
+ return
48
+
49
+ batches = self._batch_items(items, DEFAULT_BATCH_MAX_CHARS)
50
+ for batch in batches:
51
+ texts = [self._build_input_text(item) for item in batch]
52
+ texts = [t for t in texts if t]
53
+ if not texts:
54
+ continue
55
+ summaries = summarizer._summarize_chunk(texts, source="dashboard")
56
+ if not summaries:
57
+ continue
58
+ with self._lock:
59
+ for item, summary in zip(batch, summaries):
60
+ key = self._item_key(item)
61
+ if not key:
62
+ continue
63
+ self._summaries[key] = {
64
+ "id": item.get("id", key),
65
+ "title": item.get("title", ""),
66
+ "source": item.get("source", ""),
67
+ "summary": summary,
68
+ "timestamp": datetime.now(),
69
+ }
70
+ self._last_update = datetime.now()
71
+
72
+ def get_summaries(self) -> Tuple[List[Dict], Optional[datetime]]:
73
+ with self._lock:
74
+ summaries = list(self._summaries.values())
75
+ last_update = self._last_update
76
+ summaries.sort(key=lambda x: x.get("timestamp", datetime.min), reverse=True)
77
+ return summaries, last_update
78
+
79
+ def get_status(self) -> Dict:
80
+ with self._lock:
81
+ buffer_size = len(self._buffer)
82
+ buffer_start = self._buffer_start
83
+ total_summaries = len(self._summaries)
84
+ last_update = self._last_update
85
+ buffer_age_seconds = None
86
+ buffer_remaining_seconds = None
87
+ if buffer_start:
88
+ buffer_age_seconds = (datetime.now() - buffer_start).total_seconds()
89
+ buffer_remaining_seconds = max(BUFFER_SECONDS - buffer_age_seconds, 0)
90
+ return {
91
+ "buffer_size": buffer_size,
92
+ "buffer_started_at": buffer_start,
93
+ "buffer_age_seconds": buffer_age_seconds,
94
+ "buffer_remaining_seconds": buffer_remaining_seconds,
95
+ "buffer_window_seconds": BUFFER_SECONDS,
96
+ "total_summaries": total_summaries,
97
+ "last_update": last_update,
98
+ "batch_max_chars": DEFAULT_BATCH_MAX_CHARS,
99
+ }
100
+
101
+ def _item_key(self, item: Dict) -> str:
102
+ if item.get("id") is not None:
103
+ return str(item.get("id"))
104
+ title = str(item.get("title", "")).strip()
105
+ source = str(item.get("source", "")).strip()
106
+ if not title:
107
+ return ""
108
+ return f"{source}|{title}".lower()
109
+
110
+ def _build_input_text(self, item: Dict) -> str:
111
+ title = str(item.get("title", "")).strip()
112
+ source = str(item.get("source", "")).strip()
113
+ if not title:
114
+ return ""
115
+ if source:
116
+ return f"Source: {source}\nTitle: {title}"
117
+ return f"Title: {title}"
118
+
119
+ def _batch_items(self, items: List[Dict], max_chars_total: int) -> List[List[Dict]]:
120
+ if max_chars_total <= 0:
121
+ return [items]
122
+ batches: List[List[Dict]] = []
123
+ current: List[Dict] = []
124
+ current_chars = 0
125
+ for item in items:
126
+ text = self._build_input_text(item)
127
+ if not text:
128
+ continue
129
+ text_len = len(text)
130
+ if current and current_chars + text_len > max_chars_total:
131
+ batches.append(current)
132
+ current = []
133
+ current_chars = 0
134
+ current.append(item)
135
+ current_chars += text_len
136
+ if current:
137
+ batches.append(current)
138
+ return batches
139
+
140
+
141
+ ai_summary_cache = AISummaryCache()
app/utils/ai_summary_store.py ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """File-backed AI summary buffer and cache with optional HF dataset sync."""
2
+
3
+ import json
4
+ import os
5
+ import time
6
+ from contextlib import contextmanager
7
+ from datetime import datetime
8
+ from typing import Dict, Iterable, List, Optional, Tuple
9
+
10
+ try:
11
+ import fcntl
12
+ except Exception: # pragma: no cover
13
+ fcntl = None
14
+
15
+ try:
16
+ from huggingface_hub import HfApi, snapshot_download
17
+ except Exception: # pragma: no cover
18
+ HfApi = None
19
+ snapshot_download = None
20
+
21
+ CACHE_DIR = os.getenv("AI_SUMMARY_CACHE_DIR", "./ai-summary-cache")
22
+ BUFFER_SECONDS = int(os.getenv("LLM_SUMMARY_BUFFER_SECONDS", "120"))
23
+ BATCH_MAX_CHARS = int(os.getenv("LLM_SUMMARY_BATCH_MAX_CHARS", "2400"))
24
+ HF_REPO_ID = os.getenv("AI_SUMMARY_HF_REPO", "ResearchEngineering/ai_news_summaries")
25
+ HF_REPO_TYPE = os.getenv("AI_SUMMARY_HF_REPO_TYPE", "dataset")
26
+
27
+ BUFFER_FILE = "buffer.jsonl"
28
+ SUMMARIES_FILE = "summaries.jsonl"
29
+ META_FILE = "meta.json"
30
+ LOCK_FILE = ".lock"
31
+
32
+
33
+ def init_storage():
34
+ os.makedirs(CACHE_DIR, exist_ok=True)
35
+ if snapshot_download and HF_REPO_ID:
36
+ _maybe_restore_from_hf()
37
+ _ensure_files()
38
+
39
+
40
+ def enqueue_items(items: Iterable[Dict]):
41
+ init_storage()
42
+ now = time.time()
43
+
44
+ with _file_lock():
45
+ buffer_items = _read_jsonl(BUFFER_FILE)
46
+ summaries = _read_jsonl(SUMMARIES_FILE)
47
+
48
+ existing_keys = {item.get("item_key") for item in buffer_items if item.get("item_key")}
49
+ existing_keys.update({item.get("item_key") for item in summaries if item.get("item_key")})
50
+
51
+ added = 0
52
+ for item in items:
53
+ key = _item_key(item)
54
+ title = str(item.get("title", "")).strip()
55
+ if not key or not title or key in existing_keys:
56
+ continue
57
+ source = str(item.get("source", "")).strip()
58
+ buffer_items.append(
59
+ {
60
+ "item_key": key,
61
+ "title": title,
62
+ "source": source,
63
+ "created_at": now,
64
+ }
65
+ )
66
+ existing_keys.add(key)
67
+ added += 1
68
+
69
+ if added:
70
+ _write_jsonl(BUFFER_FILE, buffer_items)
71
+
72
+
73
+ def get_status() -> Dict:
74
+ init_storage()
75
+ with _file_lock():
76
+ buffer_items = _read_jsonl(BUFFER_FILE)
77
+ summaries = _read_jsonl(SUMMARIES_FILE)
78
+
79
+ buffer_count = len(buffer_items)
80
+ summaries_count = len(summaries)
81
+ last_update = None
82
+ if summaries:
83
+ last_update = max(item.get("updated_at", 0) for item in summaries)
84
+
85
+ buffer_oldest = None
86
+ if buffer_items:
87
+ buffer_oldest = min(item.get("created_at", 0) for item in buffer_items)
88
+
89
+ buffer_remaining = None
90
+ if buffer_oldest:
91
+ age = time.time() - buffer_oldest
92
+ buffer_remaining = max(BUFFER_SECONDS - age, 0)
93
+
94
+ last_update_text = (
95
+ datetime.fromtimestamp(last_update).strftime("%Y-%m-%d %H:%M:%S") if last_update else None
96
+ )
97
+
98
+ return {
99
+ "buffer_size": buffer_count,
100
+ "total_summaries": summaries_count,
101
+ "last_update": last_update_text,
102
+ "buffer_remaining_seconds": buffer_remaining,
103
+ "batch_max_chars": BATCH_MAX_CHARS,
104
+ "buffer_window_seconds": BUFFER_SECONDS,
105
+ }
106
+
107
+
108
+ def fetch_summaries(limit: int = 50) -> List[Dict]:
109
+ init_storage()
110
+ with _file_lock():
111
+ summaries = _read_jsonl(SUMMARIES_FILE)
112
+
113
+ summaries.sort(key=lambda x: x.get("updated_at", 0), reverse=True)
114
+ results = []
115
+ for item in summaries[:limit]:
116
+ results.append(
117
+ {
118
+ "title": item.get("title", ""),
119
+ "source": item.get("source", ""),
120
+ "summary": item.get("summary", ""),
121
+ "timestamp": datetime.fromtimestamp(item.get("updated_at", time.time())),
122
+ }
123
+ )
124
+ return results
125
+
126
+
127
+ def fetch_ready_batches(max_chars_total: int, buffer_seconds: int) -> List[List[Tuple[str, str, str]]]:
128
+ init_storage()
129
+ cutoff = time.time() - buffer_seconds
130
+
131
+ with _file_lock():
132
+ buffer_items = _read_jsonl(BUFFER_FILE)
133
+
134
+ eligible = [item for item in buffer_items if item.get("created_at", 0) <= cutoff]
135
+ eligible.sort(key=lambda x: x.get("created_at", 0))
136
+
137
+ batches: List[List[Tuple[str, str, str]]] = []
138
+ current: List[Tuple[str, str, str]] = []
139
+ current_chars = 0
140
+
141
+ for item in eligible:
142
+ title = item.get("title", "")
143
+ source = item.get("source", "")
144
+ text = _build_input_text(title, source)
145
+ text_len = len(text)
146
+ if current and current_chars + text_len > max_chars_total:
147
+ batches.append(current)
148
+ current = []
149
+ current_chars = 0
150
+ current.append((item.get("item_key"), title, source))
151
+ current_chars += text_len
152
+
153
+ if current:
154
+ batches.append(current)
155
+
156
+ return batches
157
+
158
+
159
+ def store_summaries(items: List[Tuple[str, str, str, str]]):
160
+ if not items:
161
+ return
162
+
163
+ init_storage()
164
+ now = time.time()
165
+
166
+ with _file_lock():
167
+ summaries = _read_jsonl(SUMMARIES_FILE)
168
+ buffer_items = _read_jsonl(BUFFER_FILE)
169
+
170
+ summaries_by_key = {item.get("item_key"): item for item in summaries if item.get("item_key")}
171
+ buffer_by_key = {item.get("item_key"): item for item in buffer_items if item.get("item_key")}
172
+
173
+ for item_key, title, source, summary in items:
174
+ summaries_by_key[item_key] = {
175
+ "item_key": item_key,
176
+ "title": title,
177
+ "source": source,
178
+ "summary": summary,
179
+ "updated_at": now,
180
+ }
181
+ if item_key in buffer_by_key:
182
+ del buffer_by_key[item_key]
183
+
184
+ _write_jsonl(SUMMARIES_FILE, list(summaries_by_key.values()))
185
+ _write_jsonl(BUFFER_FILE, list(buffer_by_key.values()))
186
+
187
+ _write_meta({"last_sync": None, "last_update": now})
188
+ _sync_to_hf_if_configured()
189
+
190
+
191
+ def _item_key(item: Dict) -> str:
192
+ if item.get("id") is not None:
193
+ return str(item.get("id"))
194
+ title = str(item.get("title", "")).strip()
195
+ source = str(item.get("source", "")).strip()
196
+ if not title:
197
+ return ""
198
+ return f"{source}|{title}".lower()
199
+
200
+
201
+ def _build_input_text(title: str, source: str) -> str:
202
+ if source:
203
+ return f"Source: {source}\nTitle: {title}"
204
+ return f"Title: {title}"
205
+
206
+
207
+ def _ensure_files():
208
+ for name in (BUFFER_FILE, SUMMARIES_FILE):
209
+ path = os.path.join(CACHE_DIR, name)
210
+ if not os.path.exists(path):
211
+ with open(path, "w", encoding="utf-8") as f:
212
+ f.write("")
213
+
214
+
215
+ def _read_jsonl(filename: str) -> List[Dict]:
216
+ path = os.path.join(CACHE_DIR, filename)
217
+ if not os.path.exists(path):
218
+ return []
219
+ items = []
220
+ with open(path, "r", encoding="utf-8") as f:
221
+ for line in f:
222
+ line = line.strip()
223
+ if not line:
224
+ continue
225
+ try:
226
+ items.append(json.loads(line))
227
+ except Exception:
228
+ continue
229
+ return items
230
+
231
+
232
+ def _write_jsonl(filename: str, items: List[Dict]):
233
+ path = os.path.join(CACHE_DIR, filename)
234
+ tmp_path = path + ".tmp"
235
+ with open(tmp_path, "w", encoding="utf-8") as f:
236
+ for item in items:
237
+ f.write(json.dumps(item, ensure_ascii=True) + "\n")
238
+ os.replace(tmp_path, path)
239
+
240
+
241
+ def _write_meta(data: Dict):
242
+ path = os.path.join(CACHE_DIR, META_FILE)
243
+ tmp_path = path + ".tmp"
244
+ with open(tmp_path, "w", encoding="utf-8") as f:
245
+ json.dump(data, f)
246
+ os.replace(tmp_path, path)
247
+
248
+
249
+ @contextmanager
250
+ def _file_lock():
251
+ os.makedirs(CACHE_DIR, exist_ok=True)
252
+ lock_path = os.path.join(CACHE_DIR, LOCK_FILE)
253
+ if fcntl is None:
254
+ yield
255
+ return
256
+ with open(lock_path, "w", encoding="utf-8") as lock_file:
257
+ fcntl.flock(lock_file, fcntl.LOCK_EX)
258
+ try:
259
+ yield
260
+ finally:
261
+ fcntl.flock(lock_file, fcntl.LOCK_UN)
262
+
263
+
264
+ def _maybe_restore_from_hf():
265
+ if not snapshot_download:
266
+ return
267
+ if not HF_REPO_ID:
268
+ return
269
+ if os.path.exists(os.path.join(CACHE_DIR, SUMMARIES_FILE)):
270
+ return
271
+ snapshot_download(
272
+ repo_id=HF_REPO_ID,
273
+ repo_type=HF_REPO_TYPE,
274
+ local_dir=CACHE_DIR,
275
+ local_dir_use_symlinks=False,
276
+ )
277
+
278
+
279
+ def _sync_to_hf_if_configured():
280
+ if not HfApi or not HF_REPO_ID:
281
+ return
282
+ api = HfApi()
283
+ api.upload_folder(
284
+ folder_path=CACHE_DIR,
285
+ repo_id=HF_REPO_ID,
286
+ repo_type=HF_REPO_TYPE,
287
+ )
app/utils/ai_summary_worker.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Background worker process for AI summarization."""
2
+
3
+ import os
4
+ import time
5
+ import logging
6
+ import signal
7
+ import sqlite3
8
+ from typing import List, Tuple
9
+
10
+ from utils.llm_summarizer import OpenAICompatSummarizer
11
+ from utils.ai_summary_store import (
12
+ init_storage,
13
+ fetch_ready_batches,
14
+ store_summaries,
15
+ BATCH_MAX_CHARS,
16
+ BUFFER_SECONDS,
17
+ )
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ PID_FILE = os.getenv("AI_SUMMARY_WORKER_PID", "/tmp/ai_summary_worker.pid")
22
+ POLL_SECONDS = int(os.getenv("AI_SUMMARY_POLL_SECONDS", "5"))
23
+ MAX_RETRIES = int(os.getenv("LLM_SUMMARY_RETRIES", "3"))
24
+
25
+
26
+ class Worker:
27
+ def __init__(self):
28
+ self._stop = False
29
+ self.summarizer = OpenAICompatSummarizer()
30
+
31
+ def stop(self, *_args):
32
+ self._stop = True
33
+
34
+ def run(self):
35
+ init_storage()
36
+ signal.signal(signal.SIGTERM, self.stop)
37
+ signal.signal(signal.SIGINT, self.stop)
38
+
39
+ while not self._stop:
40
+ try:
41
+ batches = fetch_ready_batches(BATCH_MAX_CHARS, BUFFER_SECONDS)
42
+ for batch in batches:
43
+ self._process_batch(batch)
44
+ except sqlite3.Error as exc:
45
+ logger.warning(f"AI worker DB error: {exc}")
46
+ except Exception as exc:
47
+ logger.warning(f"AI worker error: {exc}")
48
+
49
+ time.sleep(POLL_SECONDS)
50
+
51
+ def _process_batch(self, batch: List[Tuple[str, str, str]]):
52
+ if not batch or not self.summarizer.enabled:
53
+ return
54
+
55
+ texts = []
56
+ for _, title, source in batch:
57
+ if source:
58
+ texts.append(f"Source: {source}\nTitle: {title}")
59
+ else:
60
+ texts.append(f"Title: {title}")
61
+
62
+ for attempt in range(1, MAX_RETRIES + 1):
63
+ summaries = self.summarizer._summarize_chunk(texts, source="dashboard")
64
+ if summaries and len(summaries) == len(batch):
65
+ break
66
+ if attempt < MAX_RETRIES:
67
+ time.sleep(2 ** attempt)
68
+ else:
69
+ logger.warning("AI worker failed to summarize batch after retries")
70
+ return
71
+
72
+ to_store = []
73
+ for (item_key, title, source), summary in zip(batch, summaries):
74
+ if not summary:
75
+ continue
76
+ to_store.append((item_key, title, source, summary))
77
+
78
+ if to_store:
79
+ store_summaries(to_store)
80
+
81
+
82
+ def _pid_running(pid: int) -> bool:
83
+ try:
84
+ os.kill(pid, 0)
85
+ return True
86
+ except Exception:
87
+ return False
88
+
89
+
90
+ def start_worker_if_needed():
91
+ if os.path.exists(PID_FILE):
92
+ try:
93
+ with open(PID_FILE, "r", encoding="utf-8") as f:
94
+ pid = int(f.read().strip() or 0)
95
+ if pid and _pid_running(pid):
96
+ return
97
+ except Exception:
98
+ pass
99
+
100
+ pid = os.fork()
101
+ if pid != 0:
102
+ return
103
+
104
+ os.setsid()
105
+ with open(PID_FILE, "w", encoding="utf-8") as f:
106
+ f.write(str(os.getpid()))
107
+
108
+ worker = Worker()
109
+ worker.run()
app/utils/breaking_news_scorer.py ADDED
@@ -0,0 +1,368 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Breaking News Scoring System
3
+ Identifies highest-impact financial news using multi-factor weighted scoring
4
+ """
5
+
6
+ import re
7
+ from datetime import datetime, timedelta
8
+ from typing import Dict, List
9
+ import logging
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class BreakingNewsScorer:
15
+ """
16
+ Sophisticated scoring system for breaking financial news
17
+ Uses weighted factors to identify market-moving events
18
+ """
19
+
20
+ # Critical keywords with high market impact (weight: 3.0)
21
+ CRITICAL_KEYWORDS = [
22
+ # Central Bank Actions
23
+ 'rate hike', 'rate cut', 'interest rate', 'fed raises', 'fed cuts',
24
+ 'fomc decision', 'monetary policy', 'quantitative easing', 'qe',
25
+ 'emergency meeting', 'powell', 'lagarde', 'yellen',
26
+
27
+ # Market Events
28
+ 'market crash', 'flash crash', 'circuit breaker', 'trading halt',
29
+ 'all-time high', 'all time high', 'record high', 'record low',
30
+ 'biggest drop', 'biggest gain', 'historic', 'unprecedented',
31
+
32
+ # Economic Data
33
+ 'gdp', 'jobs report', 'unemployment', 'inflation',
34
+ 'cpi', 'ppi', 'nonfarm payroll', 'nfp',
35
+
36
+ # Corporate Events
37
+ 'earnings beat', 'earnings miss', 'profit warning',
38
+ 'bankruptcy', 'chapter 11', 'delisted',
39
+ 'merger', 'acquisition', 'takeover', 'buyout',
40
+
41
+ # Geopolitical
42
+ 'war', 'invasion', 'sanctions', 'trade war',
43
+ 'embargo', 'default', 'debt ceiling', 'shutdown',
44
+ 'impeachment', 'coup', 'terrorist attack'
45
+ ]
46
+
47
+ # High-impact keywords (weight: 2.0)
48
+ HIGH_IMPACT_KEYWORDS = [
49
+ # Market Movement
50
+ 'surge', 'plunge', 'soar', 'tumble', 'rally', 'selloff',
51
+ 'volatility', 'whipsaw', 'correction', 'bear market', 'bull market',
52
+
53
+ # Economic Indicators
54
+ 'retail sales', 'housing starts', 'consumer confidence',
55
+ 'manufacturing index', 'pmi', 'trade deficit',
56
+
57
+ # Corporate
58
+ 'revenue beat', 'guidance', 'dividend', 'stock split',
59
+ 'ipo', 'listing', 'secondary offering',
60
+
61
+ # Crypto/Tech
62
+ 'bitcoin', 'crypto crash', 'hack', 'breach',
63
+ 'antitrust', 'regulation', 'sec investigation',
64
+
65
+ # Commodities
66
+ 'oil', 'gold', 'crude', 'opec', 'energy crisis',
67
+ 'supply chain', 'shortage', 'surplus'
68
+ ]
69
+
70
+ # Medium-impact keywords (weight: 1.5)
71
+ MEDIUM_IMPACT_KEYWORDS = [
72
+ 'analyst', 'upgrade', 'downgrade', 'target price',
73
+ 'forecast', 'outlook', 'projection', 'estimate',
74
+ 'conference call', 'ceo', 'cfo', 'executive',
75
+ 'lawsuit', 'settlement', 'fine', 'penalty',
76
+ 'product launch', 'partnership', 'deal', 'contract'
77
+ ]
78
+
79
+ # Premium source weights (multipliers)
80
+ SOURCE_WEIGHTS = {
81
+ # Tier 1: Breaking News Specialists (2.0x)
82
+ 'walter_bloomberg': 2.0,
83
+ 'fxhedge': 2.0,
84
+ 'deitaone': 2.0,
85
+ 'firstsquawk': 1.9,
86
+ 'livesquawk': 1.9,
87
+
88
+ # Tier 2: Major Financial Media (1.8x)
89
+ 'reuters': 1.8,
90
+ 'bloomberg': 1.8,
91
+ 'ft': 1.7,
92
+ 'wsj': 1.7,
93
+
94
+ # Tier 3: Mainstream Media (1.5x)
95
+ 'cnbc': 1.5,
96
+ 'bbc': 1.5,
97
+ 'marketwatch': 1.5,
98
+
99
+ # Tier 4: Alternative/Community (1.2x)
100
+ 'zerohedge': 1.2,
101
+ 'wallstreetbets': 1.2,
102
+ 'reddit': 1.2,
103
+
104
+ # Default
105
+ 'default': 1.0
106
+ }
107
+
108
+ # Ticker mention bonus (companies that move markets)
109
+ MAJOR_TICKERS = [
110
+ 'SPY', 'QQQ', 'DIA', 'IWM', # Market indices
111
+ 'AAPL', 'MSFT', 'GOOGL', 'AMZN', 'NVDA', 'TSLA', 'META', # Mega caps
112
+ 'JPM', 'BAC', 'GS', 'MS', 'WFC', # Banks
113
+ 'XOM', 'CVX', 'COP', # Energy
114
+ 'BTC', 'ETH', 'BTCUSD', 'ETHUSD' # Crypto
115
+ ]
116
+
117
+ def __init__(self):
118
+ """Initialize the breaking news scorer"""
119
+ logger.info("BreakingNewsScorer initialized")
120
+
121
+ def calculate_impact_score(self, news_item: Dict) -> float:
122
+ """
123
+ Calculate comprehensive impact score for a news item
124
+
125
+ Args:
126
+ news_item: Dictionary containing news metadata
127
+
128
+ Returns:
129
+ Impact score (0-100, higher = more impactful)
130
+ """
131
+ score = 0.0
132
+
133
+ # Extract key fields
134
+ title = news_item.get('title', '').lower()
135
+ summary = news_item.get('summary', '').lower()
136
+ source = news_item.get('source', '').lower()
137
+ timestamp = news_item.get('timestamp', datetime.now())
138
+ sentiment = news_item.get('sentiment', 'neutral')
139
+ impact_level = news_item.get('impact', 'low')
140
+ category = news_item.get('category', 'markets')
141
+
142
+ # Combine title and summary for keyword analysis
143
+ text = f"{title} {summary}"
144
+
145
+ # 1. KEYWORD SCORING (30 points max)
146
+ keyword_score = self._score_keywords(text)
147
+ score += keyword_score
148
+
149
+ # 2. RECENCY SCORING (20 points max)
150
+ recency_score = self._score_recency(timestamp)
151
+ score += recency_score
152
+
153
+ # 3. SOURCE CREDIBILITY (20 points max)
154
+ source_score = self._score_source(source)
155
+ score += source_score
156
+
157
+ # 4. ENGAGEMENT SCORING (15 points max)
158
+ engagement_score = self._score_engagement(news_item)
159
+ score += engagement_score
160
+
161
+ # 5. SENTIMENT EXTREMITY (10 points max)
162
+ sentiment_score = self._score_sentiment(sentiment)
163
+ score += sentiment_score
164
+
165
+ # 6. CATEGORY RELEVANCE (5 points max)
166
+ category_score = self._score_category(category)
167
+ score += category_score
168
+
169
+ # 7. TICKER MENTIONS (bonus up to 10 points)
170
+ ticker_score = self._score_tickers(text)
171
+ score += ticker_score
172
+
173
+ # 8. URGENCY INDICATORS (bonus up to 10 points)
174
+ urgency_score = self._score_urgency(text)
175
+ score += urgency_score
176
+
177
+ # 9. EXISTING IMPACT LEVEL (weight existing classification)
178
+ if impact_level == 'high':
179
+ score *= 1.2
180
+ elif impact_level == 'medium':
181
+ score *= 1.1
182
+
183
+ # Cap at 100
184
+ score = min(score, 100.0)
185
+
186
+ logger.debug(f"News '{title[:50]}...' scored: {score:.2f}")
187
+
188
+ return score
189
+
190
+ def _score_keywords(self, text: str) -> float:
191
+ """Score based on keyword presence and frequency"""
192
+ score = 0.0
193
+
194
+ # Critical keywords (3.0 points each, max 18)
195
+ critical_matches = sum(1 for kw in self.CRITICAL_KEYWORDS if kw in text)
196
+ score += min(critical_matches * 3.0, 18.0)
197
+
198
+ # High-impact keywords (2.0 points each, max 8)
199
+ high_matches = sum(1 for kw in self.HIGH_IMPACT_KEYWORDS if kw in text)
200
+ score += min(high_matches * 2.0, 8.0)
201
+
202
+ # Medium-impact keywords (1.0 points each, max 4)
203
+ medium_matches = sum(1 for kw in self.MEDIUM_IMPACT_KEYWORDS if kw in text)
204
+ score += min(medium_matches * 1.0, 4.0)
205
+
206
+ return min(score, 30.0)
207
+
208
+ def _score_recency(self, timestamp: datetime) -> float:
209
+ """Score based on how recent the news is"""
210
+ try:
211
+ if isinstance(timestamp, str):
212
+ timestamp = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
213
+
214
+ age_seconds = (datetime.now() - timestamp.replace(tzinfo=None)).total_seconds()
215
+ age_minutes = age_seconds / 60
216
+
217
+ # Exponential decay: most recent = highest score
218
+ if age_minutes < 5:
219
+ return 20.0 # Within 5 minutes: full score
220
+ elif age_minutes < 15:
221
+ return 18.0 # 5-15 minutes
222
+ elif age_minutes < 30:
223
+ return 15.0 # 15-30 minutes
224
+ elif age_minutes < 60:
225
+ return 10.0 # 30-60 minutes
226
+ elif age_minutes < 180:
227
+ return 5.0 # 1-3 hours
228
+ else:
229
+ return 1.0 # Older than 3 hours
230
+ except:
231
+ return 5.0 # Default if timestamp parsing fails
232
+
233
+ def _score_source(self, source: str) -> float:
234
+ """Score based on source credibility"""
235
+ source = source.lower().replace(' ', '_').replace('/', '').replace('@', '')
236
+
237
+ # Check for known sources
238
+ for source_key, weight in self.SOURCE_WEIGHTS.items():
239
+ if source_key in source:
240
+ return weight * 10.0 # Scale to max 20 points
241
+
242
+ return self.SOURCE_WEIGHTS['default'] * 10.0
243
+
244
+ def _score_engagement(self, news_item: Dict) -> float:
245
+ """Score based on social engagement metrics"""
246
+ score = 0.0
247
+ has_engagement = False
248
+
249
+ # Twitter engagement (top-level fields)
250
+ likes = news_item.get('likes', 0)
251
+ if likes > 0:
252
+ has_engagement = True
253
+ score += min(likes / 1000, 5.0) # Max 5 points for likes
254
+
255
+ retweets = news_item.get('retweets', 0)
256
+ if retweets > 0:
257
+ has_engagement = True
258
+ score += min(retweets / 500, 5.0) # Max 5 points for retweets
259
+
260
+ # Reddit engagement (top-level fields)
261
+ reddit_score = news_item.get('reddit_score', 0)
262
+ if reddit_score > 0:
263
+ has_engagement = True
264
+ score += min(reddit_score / 1000, 5.0) # Max 5 points for score
265
+
266
+ comments = news_item.get('reddit_comments', 0)
267
+ if comments > 0:
268
+ has_engagement = True
269
+ score += min(comments / 200, 5.0) # Max 5 points for comments
270
+
271
+ # If no engagement data, return default score
272
+ if not has_engagement:
273
+ return 5.0
274
+
275
+ return min(score, 15.0)
276
+
277
+ def _score_sentiment(self, sentiment: str) -> float:
278
+ """Score based on sentiment extremity (extreme = more impactful)"""
279
+ if sentiment == 'positive':
280
+ return 8.0 # Strong positive news moves markets
281
+ elif sentiment == 'negative':
282
+ return 10.0 # Negative news tends to have more impact
283
+ else:
284
+ return 3.0 # Neutral news less impactful
285
+
286
+ def _score_category(self, category: str) -> float:
287
+ """Score based on category relevance"""
288
+ if category == 'macro':
289
+ return 5.0 # Macro news affects entire market
290
+ elif category == 'markets':
291
+ return 4.0 # Direct market news
292
+ elif category == 'geopolitical':
293
+ return 3.0 # Geopolitical can be high impact
294
+ else:
295
+ return 2.0 # Other categories
296
+
297
+ def _score_tickers(self, text: str) -> float:
298
+ """Bonus score for mentioning major market-moving tickers"""
299
+ text_upper = text.upper()
300
+
301
+ # Count major ticker mentions
302
+ ticker_mentions = sum(1 for ticker in self.MAJOR_TICKERS if ticker in text_upper)
303
+
304
+ # 2 points per ticker, max 10 points
305
+ return min(ticker_mentions * 2.0, 10.0)
306
+
307
+ def _score_urgency(self, text: str) -> float:
308
+ """Bonus score for urgency indicators"""
309
+ urgency_patterns = [
310
+ r'\bbreaking\b', r'\balert\b', r'\burgent\b', r'\bjust in\b',
311
+ r'\bemergency\b', r'\bimmediate\b', r'\bnow\b', r'\btoday\b',
312
+ r'‼️', r'🚨', r'⚠️', r'πŸ”΄', r'❗'
313
+ ]
314
+
315
+ score = 0.0
316
+ for pattern in urgency_patterns:
317
+ if re.search(pattern, text, re.IGNORECASE):
318
+ score += 2.0
319
+
320
+ return min(score, 10.0)
321
+
322
+ def get_breaking_news(self, news_items: List[Dict], top_n: int = 1) -> List[Dict]:
323
+ """
324
+ Identify top breaking news from a list
325
+
326
+ Args:
327
+ news_items: List of news item dictionaries
328
+ top_n: Number of top items to return
329
+
330
+ Returns:
331
+ List of top breaking news items with scores
332
+ """
333
+ if not news_items:
334
+ return []
335
+
336
+ # Calculate scores for all items
337
+ scored_items = []
338
+ for item in news_items:
339
+ score = self.calculate_impact_score(item)
340
+ scored_items.append({
341
+ **item,
342
+ 'breaking_score': score
343
+ })
344
+
345
+ # Sort by score (descending)
346
+ scored_items.sort(key=lambda x: x['breaking_score'], reverse=True)
347
+
348
+ # Log top items
349
+ logger.info(f"Top {top_n} breaking news:")
350
+ for i, item in enumerate(scored_items[:top_n], 1):
351
+ logger.info(f" {i}. [{item['breaking_score']:.1f}] {item['title'][:60]}...")
352
+
353
+ return scored_items[:top_n]
354
+
355
+ def get_breaking_threshold(self) -> float:
356
+ """Get minimum score threshold for breaking news display"""
357
+ return 40.0 # Only show news with score >= 40 (out of 100)
358
+
359
+
360
+ # Singleton instance
361
+ _scorer_instance = None
362
+
363
+ def get_breaking_news_scorer() -> BreakingNewsScorer:
364
+ """Get singleton instance of BreakingNewsScorer"""
365
+ global _scorer_instance
366
+ if _scorer_instance is None:
367
+ _scorer_instance = BreakingNewsScorer()
368
+ return _scorer_instance
app/utils/config.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Configuration management for the financial dashboard."""
2
+
3
+ import os
4
+ from dotenv import load_dotenv
5
+
6
+ # Load environment variables
7
+ load_dotenv()
8
+
9
+
10
+ class Config:
11
+ """Application configuration."""
12
+
13
+ # API Keys
14
+ DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY", "")
15
+ NEWS_SERVICE_URL = os.getenv("NEWS_SERVICE_URL", "")
16
+ ALPHA_VANTAGE_KEY = os.getenv("ALPHA_VANTAGE_KEY", "")
17
+
18
+ # Cache settings
19
+ PRICE_DATA_TTL = 3600 # 1 hour
20
+ FUNDAMENTAL_DATA_TTL = 86400 # 24 hours
21
+ NEWS_DATA_TTL = 900 # 15 minutes
22
+
23
+ # App settings
24
+ DEFAULT_STOCK_SYMBOL = "AAPL"
25
+ DEFAULT_CRYPTO_SYMBOL = "BTC/USD"
26
+ DEFAULT_FOREX_SYMBOL = "EUR/USD"
27
+ DEFAULT_INDICATOR_PERIOD = 20
28
+
29
+ # Data source settings
30
+ MAX_RETRY_ATTEMPTS = 3
31
+ REQUEST_TIMEOUT = 30
32
+
33
+
34
+ config = Config()
app/utils/formatters.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Data formatting utilities for the financial dashboard."""
2
+
3
+ import pandas as pd
4
+
5
+
6
+ def format_financial_value(value) -> str:
7
+ """Format financial values with appropriate units."""
8
+ if pd.isna(value):
9
+ return "N/A"
10
+ if abs(value) >= 1e9:
11
+ return f"${value/1e9:.2f}B"
12
+ elif abs(value) >= 1e6:
13
+ return f"${value/1e6:.2f}M"
14
+ else:
15
+ return f"${value:.2f}"
16
+
17
+
18
+ def format_percentage(value: float, decimals: int = 2) -> str:
19
+ """Format percentage values."""
20
+ if pd.isna(value):
21
+ return "N/A"
22
+ return f"{value:.{decimals}f}%"
23
+
24
+
25
+ def format_currency(value: float, decimals: int = 2) -> str:
26
+ """Format currency values."""
27
+ if pd.isna(value):
28
+ return "N/A"
29
+ return f"${value:,.{decimals}f}"
app/utils/llm_summarizer.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """OpenAI-compatible LLM summarizer for news items."""
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ import time
7
+ from typing import Dict, List, Optional, Tuple
8
+
9
+ import requests
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class OpenAICompatSummarizer:
15
+ """
16
+ Summarize news items using an OpenAI-compatible chat completions API.
17
+ """
18
+
19
+ def __init__(
20
+ self,
21
+ api_base: Optional[str] = None,
22
+ api_key: Optional[str] = None,
23
+ model: Optional[str] = None,
24
+ timeout: Optional[int] = None,
25
+ max_items_per_request: Optional[int] = None,
26
+ max_chars_per_item: Optional[int] = None,
27
+ max_chars_total: Optional[int] = None,
28
+ ):
29
+ self.api_base = (api_base or os.getenv("LLM_API_BASE") or "https://researchengineering-agi.hf.space").rstrip("/")
30
+ self.api_key = api_key if api_key is not None else os.getenv("LLM_API_KEY", "")
31
+ self.model = model or os.getenv("LLM_MODEL", "gpt-4o-mini")
32
+ self.timeout = timeout or int(os.getenv("LLM_TIMEOUT", "600"))
33
+ # Conservative defaults to avoid large token bursts on slow servers.
34
+ self.max_items_per_request = max_items_per_request or int(os.getenv("LLM_SUMMARY_BATCH", "2"))
35
+ self.max_chars_per_item = max_chars_per_item or int(os.getenv("LLM_SUMMARY_MAX_CHARS", "600"))
36
+ self.max_chars_total = max_chars_total or int(os.getenv("LLM_SUMMARY_MAX_CHARS_TOTAL", "1200"))
37
+ self.enabled = os.getenv("ENABLE_AI_SUMMARIZATION", "true").lower() in {"1", "true", "yes"}
38
+ self.sleep_seconds = float(os.getenv("LLM_SUMMARY_SLEEP_SECONDS", "0"))
39
+
40
+ self._chat_url = f"{self.api_base}/v1/chat/completions"
41
+
42
+ def summarize_items(self, items: List[Dict], source: Optional[str] = None) -> List[Dict]:
43
+ if not self.enabled or not items:
44
+ return items
45
+
46
+ candidates: List[Tuple[Dict, str]] = []
47
+ for item in items:
48
+ if str(item.get("summary_ai", "")).strip():
49
+ continue
50
+ text = self._build_input_text(item)
51
+ if text:
52
+ candidates.append((item, text))
53
+
54
+ if not candidates:
55
+ return items
56
+
57
+ chunks = self._chunked(candidates, self.max_items_per_request)
58
+ for idx, chunk in enumerate(chunks, start=1):
59
+ texts = [text for _, text in chunk]
60
+ if self.max_chars_total > 0:
61
+ texts = self._truncate_to_total(texts, self.max_chars_total)
62
+ summaries = self._summarize_chunk(texts, source=source)
63
+ if not summaries:
64
+ continue
65
+ for (item, _), summary in zip(chunk, summaries):
66
+ if summary:
67
+ item["summary_ai"] = summary
68
+ item["summary"] = summary
69
+ if self.sleep_seconds > 0 and idx < len(chunks):
70
+ time.sleep(self.sleep_seconds)
71
+
72
+ return items
73
+
74
+ def _build_input_text(self, item: Dict) -> str:
75
+ title = str(item.get("title", "")).strip()
76
+ if title:
77
+ source = str(item.get("source", "")).strip()
78
+ if len(title) > self.max_chars_per_item:
79
+ title = title[: self.max_chars_per_item].rstrip()
80
+ if source:
81
+ return f"Source: {source}\nTitle: {title}"
82
+ return f"Title: {title}"
83
+ return ""
84
+
85
+ def _summarize_chunk(self, texts: List[str], source: Optional[str] = None) -> List[str]:
86
+ system_prompt = (
87
+ "You are a financial news summarizer. "
88
+ "Return concise, factual summaries in 1-2 sentences, <=240 characters each. "
89
+ "Do not add speculation or new facts."
90
+ )
91
+ source_line = f"Source: {source}" if source else ""
92
+
93
+ items_text = []
94
+ for idx, text in enumerate(texts, start=1):
95
+ items_text.append(f"{idx}. {text}")
96
+
97
+ user_prompt = (
98
+ "Summarize each item below. "
99
+ "Return a JSON array of strings in the same order. "
100
+ "No extra text.\n"
101
+ f"{source_line}\n\n" + "\n\n".join(items_text)
102
+ )
103
+
104
+ payload = {
105
+ "model": self.model,
106
+ "messages": [
107
+ {"role": "system", "content": system_prompt},
108
+ {"role": "user", "content": user_prompt},
109
+ ],
110
+ "temperature": 0.2,
111
+ }
112
+
113
+ headers = {"Content-Type": "application/json"}
114
+ if self.api_key:
115
+ headers["Authorization"] = f"Bearer {self.api_key}"
116
+
117
+ try:
118
+ response = requests.post(self._chat_url, json=payload, headers=headers, timeout=self.timeout)
119
+ response.raise_for_status()
120
+ data = response.json()
121
+ content = (
122
+ data.get("choices", [{}])[0]
123
+ .get("message", {})
124
+ .get("content", "")
125
+ .strip()
126
+ )
127
+ summaries = self._parse_json_array(content)
128
+ if summaries and len(summaries) == len(texts):
129
+ return summaries
130
+ logger.warning("LLM summarizer returned unexpected format or length")
131
+ return []
132
+ except Exception as exc:
133
+ logger.warning(f"LLM summarization failed: {exc}")
134
+ return []
135
+
136
+ def _parse_json_array(self, content: str) -> List[str]:
137
+ if not content:
138
+ return []
139
+ try:
140
+ parsed = json.loads(content)
141
+ if isinstance(parsed, list):
142
+ return [str(x).strip() for x in parsed]
143
+ return []
144
+ except Exception:
145
+ return []
146
+
147
+ def _chunked(self, items: List[Tuple[Dict, str]], size: int) -> List[List[Tuple[Dict, str]]]:
148
+ if size <= 0:
149
+ return [items]
150
+ return [items[i : i + size] for i in range(0, len(items), size)]
151
+
152
+ def _truncate_to_total(self, texts: List[str], max_total: int) -> List[str]:
153
+ if max_total <= 0:
154
+ return texts
155
+ truncated = []
156
+ total = 0
157
+ for text in texts:
158
+ if total >= max_total:
159
+ break
160
+ remaining = max_total - total
161
+ if len(text) > remaining:
162
+ text = text[:remaining].rstrip()
163
+ truncated.append(text)
164
+ total += len(text)
165
+ return truncated
app/utils/news_cache.py ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Unified News Caching System
3
+ Centralized cache manager for Twitter, Reddit, RSS, and AI/Tech news feeds
4
+ """
5
+
6
+ import hashlib
7
+ import logging
8
+ import re
9
+ from datetime import datetime, timedelta
10
+ from typing import List, Dict, Optional, Callable
11
+
12
+ import pandas as pd
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class NewsCacheManager:
18
+ """
19
+ Centralized cache manager for news feeds with:
20
+ - Per-source caching with TTL
21
+ - Cross-service deduplication
22
+ - Filtered results caching
23
+ - Force refresh support
24
+ """
25
+
26
+ def __init__(self, default_ttl: int = 180):
27
+ """
28
+ Initialize cache manager
29
+
30
+ Args:
31
+ default_ttl: Default time-to-live in seconds (default: 180 = 3 minutes)
32
+ """
33
+ self.cache = {
34
+ 'twitter': {'raw_news': [], 'last_fetch': None, 'ttl': default_ttl},
35
+ 'reddit': {'raw_news': [], 'last_fetch': None, 'ttl': default_ttl},
36
+ 'rss': {'raw_news': [], 'last_fetch': None, 'ttl': default_ttl},
37
+ 'ai_tech': {'raw_news': [], 'last_fetch': None, 'ttl': default_ttl},
38
+ 'predictions': {'raw_news': [], 'last_fetch': None, 'ttl': default_ttl},
39
+ 'sectoral_news': {'raw_news': [], 'last_fetch': None, 'ttl': default_ttl},
40
+ 'market_events': {'raw_news': [], 'last_fetch': None, 'ttl': default_ttl},
41
+ 'economic_calendar': {'raw_news': [], 'last_fetch': None, 'ttl': default_ttl},
42
+ 'dedup_index': {}, # Global deduplication index
43
+ 'filtered_cache': {} # Cached filtered results
44
+ }
45
+ logger.info(f"NewsCacheManager initialized with {default_ttl}s TTL")
46
+
47
+ def get_news(
48
+ self,
49
+ source: str,
50
+ fetcher_func: Callable,
51
+ force_refresh: bool = False,
52
+ deduplicate: bool = False,
53
+ **kwargs
54
+ ) -> List[Dict]:
55
+ """
56
+ Get news from cache or fetch fresh if needed
57
+
58
+ Args:
59
+ source: News source ('twitter', 'reddit', 'rss', 'ai_tech')
60
+ fetcher_func: Function to fetch fresh news
61
+ force_refresh: If True, bypass cache and fetch fresh
62
+ deduplicate: If True, remove duplicates across sources using global index
63
+ **kwargs: Arguments to pass to fetcher_func
64
+
65
+ Returns:
66
+ List of news items
67
+ """
68
+ if source not in ['twitter', 'reddit', 'rss', 'ai_tech', 'predictions', 'sectoral_news', 'market_events', 'economic_calendar']:
69
+ logger.error(f"Invalid source: {source}")
70
+ return []
71
+
72
+ # Force refresh clears dedup index for that source
73
+ if force_refresh:
74
+ self._clear_source_from_dedup(source)
75
+
76
+ # Check if cache is valid
77
+ if not force_refresh and self._is_cache_valid(source):
78
+ logger.info(f"βœ… Cache HIT for {source} (age: {self._get_cache_age(source):.1f}s)")
79
+ return self.cache[source]['raw_news']
80
+
81
+ # Cache miss or force refresh - fetch fresh news
82
+ logger.info(f"πŸ”„ Cache MISS for {source} - fetching fresh news...")
83
+ try:
84
+ logger.info(f"πŸ“ž Calling fetcher for {source} with kwargs: {kwargs}")
85
+ new_items = fetcher_func(**kwargs)
86
+ logger.info(f"πŸ“¦ Fetcher returned {len(new_items) if new_items else 0} items for {source}")
87
+
88
+ if not new_items:
89
+ logger.warning(f"⚠️ No news items fetched for {source} - returning cached data")
90
+ # Return cached data if available, even if expired
91
+ return self.cache[source]['raw_news']
92
+
93
+ self._prepare_summaries(new_items)
94
+
95
+ # Update cache
96
+ self._update_cache(source, new_items)
97
+
98
+ if deduplicate:
99
+ deduplicated = self._deduplicate(new_items, source)
100
+ logger.info(f"βœ… Fetched {len(new_items)} items for {source}, {len(deduplicated)} unique after dedup")
101
+ return deduplicated
102
+
103
+ logger.info(f"βœ… Fetched {len(new_items)} items for {source} (dedup disabled)")
104
+ return new_items
105
+
106
+ except Exception as e:
107
+ logger.error(f"Error fetching news for {source}: {e}")
108
+ # Return cached data if available
109
+ return self.cache[source]['raw_news']
110
+
111
+ def _is_cache_valid(self, source: str) -> bool:
112
+ """
113
+ Check if cached data is still fresh
114
+
115
+ Args:
116
+ source: News source to check
117
+
118
+ Returns:
119
+ True if cache is valid, False otherwise
120
+ """
121
+ source_cache = self.cache[source]
122
+ if not source_cache['last_fetch']:
123
+ return False
124
+
125
+ age = (datetime.now() - source_cache['last_fetch']).total_seconds()
126
+ is_valid = age < source_cache['ttl']
127
+
128
+ return is_valid
129
+
130
+ def _get_cache_age(self, source: str) -> float:
131
+ """
132
+ Get age of cached data in seconds
133
+
134
+ Args:
135
+ source: News source
136
+
137
+ Returns:
138
+ Age in seconds, or -1 if never fetched
139
+ """
140
+ source_cache = self.cache[source]
141
+ if not source_cache['last_fetch']:
142
+ return -1
143
+
144
+ return (datetime.now() - source_cache['last_fetch']).total_seconds()
145
+
146
+ def _normalize_text(self, text: str) -> str:
147
+ """
148
+ Normalize text for deduplication
149
+
150
+ Args:
151
+ text: Text to normalize
152
+
153
+ Returns:
154
+ Normalized text
155
+ """
156
+ if not text:
157
+ return ""
158
+
159
+ # Convert to lowercase
160
+ text = text.lower().strip()
161
+
162
+ # Remove punctuation
163
+ text = re.sub(r'[^\w\s]', '', text)
164
+
165
+ # Normalize whitespace
166
+ text = re.sub(r'\s+', ' ', text)
167
+
168
+ return text
169
+
170
+ def _compute_hash(self, item: Dict) -> str:
171
+ """
172
+ Compute content hash for deduplication
173
+
174
+ Args:
175
+ item: News item dict
176
+
177
+ Returns:
178
+ MD5 hash string
179
+ """
180
+ title = self._normalize_text(item.get('title', ''))
181
+ summary_source = item.get('summary_raw', item.get('summary', ''))
182
+ summary = self._normalize_text(str(summary_source)[:200]) # First 200 chars
183
+
184
+ # Combine title and summary
185
+ combined = f"{title}|{summary}"
186
+
187
+ return hashlib.md5(combined.encode()).hexdigest()
188
+
189
+ def _deduplicate(self, items: List[Dict], source: str) -> List[Dict]:
190
+ """
191
+ Remove duplicates using global dedup index
192
+
193
+ Args:
194
+ items: List of news items
195
+ source: Source name
196
+
197
+ Returns:
198
+ Deduplicated list of items
199
+ """
200
+ deduplicated = []
201
+ duplicate_count = 0
202
+
203
+ for item in items:
204
+ content_hash = self._compute_hash(item)
205
+
206
+ if content_hash in self.cache['dedup_index']:
207
+ # Duplicate found - update sources list
208
+ dup_entry = self.cache['dedup_index'][content_hash]
209
+ if source not in dup_entry['sources']:
210
+ dup_entry['sources'].append(source)
211
+ duplicate_count += 1
212
+ else:
213
+ # New item - add to index and result
214
+ self.cache['dedup_index'][content_hash] = {
215
+ 'first_seen': datetime.now(),
216
+ 'sources': [source],
217
+ 'canonical_item': item
218
+ }
219
+ deduplicated.append(item)
220
+
221
+ if duplicate_count > 0:
222
+ logger.info(f"πŸ” Deduplication: Found {duplicate_count} duplicates for {source}")
223
+
224
+ return deduplicated
225
+
226
+ def _update_cache(self, source: str, items: List[Dict]):
227
+ """
228
+ Update cache with new items
229
+
230
+ Args:
231
+ source: News source
232
+ items: List of news items
233
+ """
234
+ self.cache[source]['raw_news'] = items
235
+ self.cache[source]['last_fetch'] = datetime.now()
236
+ logger.info(f"πŸ“¦ Updated cache for {source} with {len(items)} items")
237
+
238
+ def _prepare_summaries(self, items: List[Dict]):
239
+ for item in items:
240
+ if 'summary_raw' not in item:
241
+ item['summary_raw'] = item.get('summary', '')
242
+
243
+ def get_filtered_news(
244
+ self,
245
+ source_df: pd.DataFrame,
246
+ filters: Dict,
247
+ source_name: str = "unknown"
248
+ ) -> pd.DataFrame:
249
+ """
250
+ Get filtered news with caching
251
+
252
+ Args:
253
+ source_df: Source dataframe
254
+ filters: Filter dict with 'category', 'sentiment', 'impact' keys
255
+ source_name: Name of source (for logging)
256
+
257
+ Returns:
258
+ Filtered dataframe
259
+ """
260
+ if source_df.empty:
261
+ return source_df
262
+
263
+ # Create cache key from filters
264
+ category = filters.get('category', 'all')
265
+ sentiment = filters.get('sentiment', 'all')
266
+ impact = filters.get('impact', 'all')
267
+ cache_key = f"{source_name}_{category}_{sentiment}_{impact}"
268
+
269
+ # Check filtered cache
270
+ if cache_key in self.cache['filtered_cache']:
271
+ cached_entry = self.cache['filtered_cache'][cache_key]
272
+ if datetime.now() < cached_entry['expires_at']:
273
+ logger.debug(f"βœ… Filtered cache HIT for {cache_key}")
274
+ return cached_entry['results']
275
+
276
+ # Apply filters
277
+ filtered_df = source_df.copy()
278
+
279
+ if category != 'all':
280
+ filtered_df = filtered_df[filtered_df['category'] == category]
281
+
282
+ if sentiment != 'all':
283
+ filtered_df = filtered_df[filtered_df['sentiment'] == sentiment]
284
+
285
+ if impact != 'all':
286
+ filtered_df = filtered_df[filtered_df['impact'] == impact]
287
+
288
+ logger.debug(f"πŸ” Filtered {source_name}: {len(source_df)} β†’ {len(filtered_df)} items")
289
+
290
+ # Cache filtered results (5 minute TTL)
291
+ self.cache['filtered_cache'][cache_key] = {
292
+ 'results': filtered_df,
293
+ 'expires_at': datetime.now() + timedelta(seconds=300)
294
+ }
295
+
296
+ return filtered_df
297
+
298
+ def _clear_source_from_dedup(self, source: str):
299
+ """
300
+ Remove all entries from dedup index that only belong to this source
301
+
302
+ Args:
303
+ source: Source to remove from dedup index
304
+ """
305
+ to_remove = []
306
+ for content_hash, entry in self.cache['dedup_index'].items():
307
+ # Remove source from sources list
308
+ if source in entry['sources']:
309
+ entry['sources'].remove(source)
310
+ # If no sources left, mark for removal
311
+ if not entry['sources']:
312
+ to_remove.append(content_hash)
313
+
314
+ # Remove entries with no sources
315
+ for content_hash in to_remove:
316
+ del self.cache['dedup_index'][content_hash]
317
+
318
+ if to_remove:
319
+ logger.info(f"πŸ—‘οΈ Removed {len(to_remove)} entries from dedup index for {source}")
320
+
321
+ def clear_cache(self, source: Optional[str] = None):
322
+ """
323
+ Clear cache for specific source or all sources
324
+
325
+ Args:
326
+ source: Source to clear, or None to clear all
327
+ """
328
+ if source:
329
+ self.cache[source] = {'raw_news': [], 'last_fetch': None, 'ttl': 180}
330
+ self._clear_source_from_dedup(source)
331
+ logger.info(f"πŸ—‘οΈ Cleared cache for {source}")
332
+ else:
333
+ for src in ['twitter', 'reddit', 'rss', 'ai_tech', 'predictions', 'sectoral_news', 'market_events', 'economic_calendar']:
334
+ self.cache[src] = {'raw_news': [], 'last_fetch': None, 'ttl': 180}
335
+ self.cache['dedup_index'] = {}
336
+ self.cache['filtered_cache'] = {}
337
+ logger.info("πŸ—‘οΈ Cleared ALL caches")
338
+
339
+ def get_statistics(self) -> Dict:
340
+ """
341
+ Get cache statistics
342
+
343
+ Returns:
344
+ Dict with cache stats
345
+ """
346
+ stats = {
347
+ 'twitter': {
348
+ 'items': len(self.cache['twitter']['raw_news']),
349
+ 'age_seconds': self._get_cache_age('twitter'),
350
+ 'is_valid': self._is_cache_valid('twitter')
351
+ },
352
+ 'reddit': {
353
+ 'items': len(self.cache['reddit']['raw_news']),
354
+ 'age_seconds': self._get_cache_age('reddit'),
355
+ 'is_valid': self._is_cache_valid('reddit')
356
+ },
357
+ 'rss': {
358
+ 'items': len(self.cache['rss']['raw_news']),
359
+ 'age_seconds': self._get_cache_age('rss'),
360
+ 'is_valid': self._is_cache_valid('rss')
361
+ },
362
+ 'ai_tech': {
363
+ 'items': len(self.cache['ai_tech']['raw_news']),
364
+ 'age_seconds': self._get_cache_age('ai_tech'),
365
+ 'is_valid': self._is_cache_valid('ai_tech')
366
+ },
367
+ 'predictions': {
368
+ 'items': len(self.cache['predictions']['raw_news']),
369
+ 'age_seconds': self._get_cache_age('predictions'),
370
+ 'is_valid': self._is_cache_valid('predictions')
371
+ },
372
+ 'sectoral_news': {
373
+ 'items': len(self.cache['sectoral_news']['raw_news']),
374
+ 'age_seconds': self._get_cache_age('sectoral_news'),
375
+ 'is_valid': self._is_cache_valid('sectoral_news')
376
+ },
377
+ 'market_events': {
378
+ 'items': len(self.cache['market_events']['raw_news']),
379
+ 'age_seconds': self._get_cache_age('market_events'),
380
+ 'is_valid': self._is_cache_valid('market_events')
381
+ },
382
+ 'economic_calendar': {
383
+ 'items': len(self.cache['economic_calendar']['raw_news']),
384
+ 'age_seconds': self._get_cache_age('economic_calendar'),
385
+ 'is_valid': self._is_cache_valid('economic_calendar')
386
+ },
387
+ 'dedup_index_size': len(self.cache['dedup_index']),
388
+ 'filtered_cache_size': len(self.cache['filtered_cache'])
389
+ }
390
+
391
+ return stats
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit>=1.30.0
2
+ pandas>=2.0.0
3
+ plotly>=5.18.0
4
+ openbb>=4.0.0
5
+ python-dotenv>=1.0.0
6
+ requests>=2.31.0
7
+ twikit>=2.3.0
8
+ feedparser>=6.0.0
9
+ beautifulsoup4>=4.12.0
10
+ lxml>=5.0.0
11
+ ntscraper
12
+ playwright>=1.40.0
13
+ huggingface_hub>=0.22.2