Dmitry Beresnev commited on
Commit
12176df
·
1 Parent(s): 53b47af

add twikit for twitter posts

Browse files
.env.example CHANGED
@@ -8,3 +8,9 @@ NEWS_SERVICE_URL=http://localhost:5000
8
 
9
  # Alpha Vantage API Key (optional, for forex data)
10
  ALPHA_VANTAGE_KEY=your-alpha-vantage-key-here
 
 
 
 
 
 
 
8
 
9
  # Alpha Vantage API Key (optional, for forex data)
10
  ALPHA_VANTAGE_KEY=your-alpha-vantage-key-here
11
+
12
+ # Twitter/X Credentials (for real-time news monitoring via Twikit)
13
+ # Create a Twitter account or use existing credentials
14
+ TWITTER_USERNAME=your-twitter-username
15
+ TWITTER_EMAIL=your-twitter-email@example.com
16
+ TWITTER_PASSWORD=your-twitter-password
README.md CHANGED
@@ -57,7 +57,7 @@ A comprehensive multi-asset financial analysis platform built with Streamlit, pr
57
  - **Breaking news detection** with instant alerts and priority display
58
  - **Smart filtering** by category, sentiment, and impact level
59
  - **Auto-refresh mode** for continuous monitoring during trading hours
60
- - Powered by **snscrape** for real-time Twitter intelligence
61
 
62
  ## Installation
63
 
@@ -77,13 +77,20 @@ pip install -r requirements.txt
77
  cp .env.example .env
78
  ```
79
 
80
- 4. Configure your API keys in `.env`:
81
  ```
82
  DEEPSEEK_API_KEY=your-key-here
83
  NEWS_SERVICE_URL=http://localhost:5000
84
  ALPHA_VANTAGE_KEY=your-key-here
 
 
 
 
 
85
  ```
86
 
 
 
87
  ## Usage
88
 
89
  Run the application:
 
57
  - **Breaking news detection** with instant alerts and priority display
58
  - **Smart filtering** by category, sentiment, and impact level
59
  - **Auto-refresh mode** for continuous monitoring during trading hours
60
+ - Powered by **Twikit** for real-time Twitter/X intelligence (free, no API costs)
61
 
62
  ## Installation
63
 
 
77
  cp .env.example .env
78
  ```
79
 
80
+ 4. Configure your API keys and Twitter credentials in `.env`:
81
  ```
82
  DEEPSEEK_API_KEY=your-key-here
83
  NEWS_SERVICE_URL=http://localhost:5000
84
  ALPHA_VANTAGE_KEY=your-key-here
85
+
86
+ # Twitter/X Credentials (required for real-time news monitoring)
87
+ TWITTER_USERNAME=your-twitter-username
88
+ TWITTER_EMAIL=your-email@example.com
89
+ TWITTER_PASSWORD=your-password
90
  ```
91
 
92
+ **Note**: Twitter credentials are required for real-time news monitoring. Without credentials, the system will use demo/mock data.
93
+
94
  ## Usage
95
 
96
  Run the application:
app/components/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Components package for financial platform UI."""
app/pages/05_Dashboard.py CHANGED
@@ -17,7 +17,14 @@ from components.news import (
17
  display_category_breakdown,
18
  display_breaking_news_banner
19
  )
20
- from services.news_monitor import FinanceNewsMonitor
 
 
 
 
 
 
 
21
 
22
 
23
  # ---- Page Configuration ----
@@ -145,6 +152,10 @@ with st.spinner("🔍 Fetching latest financial news..."):
145
  refresh=force_refresh
146
  )
147
 
 
 
 
 
148
  # Display breaking news banner if exists
149
  display_breaking_news_banner(news_df)
150
 
@@ -199,6 +210,7 @@ st.markdown("""
199
  - Breaking news (🔴) indicates urgent market-moving information
200
  - Check engagement metrics (likes + retweets) for news importance
201
 
202
- **Data Source:** Live tweets from premium financial news sources via snscrape
203
  **Update Frequency:** 3-minute cache for low-latency delivery
 
204
  """)
 
17
  display_category_breakdown,
18
  display_breaking_news_banner
19
  )
20
+
21
+ # Try to import Twikit version first, fall back to old version
22
+ try:
23
+ from services.news_monitor_twikit import FinanceNewsMonitor
24
+ USING_TWIKIT = True
25
+ except ImportError:
26
+ from services.news_monitor import FinanceNewsMonitor
27
+ USING_TWIKIT = False
28
 
29
 
30
  # ---- Page Configuration ----
 
152
  refresh=force_refresh
153
  )
154
 
155
+ # Display demo mode notice if using mock data
156
+ if len(news_df) > 0 and news_df.iloc[0].get('id', 0) < 100:
157
+ st.info("📢 **Demo Mode**: Twitter/X API is currently unavailable. Displaying sample news data to showcase the platform's features. In production, this would show real-time financial news from 23 premium sources.")
158
+
159
  # Display breaking news banner if exists
160
  display_breaking_news_banner(news_df)
161
 
 
210
  - Breaking news (🔴) indicates urgent market-moving information
211
  - Check engagement metrics (likes + retweets) for news importance
212
 
213
+ **Data Source:** Live tweets from premium financial news sources via Twikit
214
  **Update Frequency:** 3-minute cache for low-latency delivery
215
+ **Authentication:** Requires Twitter/X account credentials in .env file
216
  """)
app/services/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Services package for financial platform."""
app/services/news_monitor.py CHANGED
@@ -207,10 +207,12 @@ class FinanceNewsMonitor:
207
  max_tweets: Total tweets to fetch (distributed across sources)
208
  """
209
  if not SNSCRAPE_AVAILABLE:
 
210
  return _self._get_mock_news()
211
 
212
  all_tweets = []
213
  tweets_per_source = max(5, max_tweets // len(_self.SOURCES))
 
214
 
215
  for source_name, source_info in _self.SOURCES.items():
216
  try:
@@ -251,9 +253,19 @@ class FinanceNewsMonitor:
251
  scraped += 1
252
 
253
  except Exception as e:
254
- print(f"Error scraping {source_name}: {e}")
 
 
 
 
 
255
  continue
256
 
 
 
 
 
 
257
  # Sort by impact and timestamp
258
  all_tweets.sort(
259
  key=lambda x: (x['is_breaking'], x['impact'] == 'high', x['timestamp']),
 
207
  max_tweets: Total tweets to fetch (distributed across sources)
208
  """
209
  if not SNSCRAPE_AVAILABLE:
210
+ print("⚠️ snscrape not available - using mock data")
211
  return _self._get_mock_news()
212
 
213
  all_tweets = []
214
  tweets_per_source = max(5, max_tweets // len(_self.SOURCES))
215
+ failed_sources = 0
216
 
217
  for source_name, source_info in _self.SOURCES.items():
218
  try:
 
253
  scraped += 1
254
 
255
  except Exception as e:
256
+ failed_sources += 1
257
+ error_msg = str(e).lower()
258
+ if 'blocked' in error_msg or '404' in error_msg:
259
+ print(f"⚠️ Twitter/X API blocked access for {source_name}")
260
+ else:
261
+ print(f"Error scraping {source_name}: {e}")
262
  continue
263
 
264
+ # If Twitter/X blocked all sources, fall back to mock data
265
+ if failed_sources >= len(_self.SOURCES) or len(all_tweets) == 0:
266
+ print("⚠️ Twitter/X API unavailable - falling back to mock data for demonstration")
267
+ return _self._get_mock_news()
268
+
269
  # Sort by impact and timestamp
270
  all_tweets.sort(
271
  key=lambda x: (x['is_breaking'], x['impact'] == 'high', x['timestamp']),
app/services/news_monitor_twikit.py ADDED
@@ -0,0 +1,608 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Professional Finance News Monitor using Twikit
3
+ Real-time tracking: Macro, Markets, Geopolitical intelligence
4
+ Optimized for low-latency trading decisions
5
+ """
6
+
7
+ import pandas as pd
8
+ from datetime import datetime, timedelta
9
+ from typing import List, Dict, Optional
10
+ import streamlit as st
11
+ import os
12
+ import asyncio
13
+ import re
14
+ from dotenv import load_dotenv
15
+
16
+ # Load environment variables
17
+ load_dotenv()
18
+
19
+ try:
20
+ from twikit import Client
21
+ TWIKIT_AVAILABLE = True
22
+ except ImportError:
23
+ TWIKIT_AVAILABLE = False
24
+ print("Warning: twikit not available. Install with: pip install twikit")
25
+
26
+
27
+ class FinanceNewsMonitor:
28
+ """
29
+ Professional-grade financial news aggregator using Twikit
30
+ Sources: Bloomberg, Reuters, WSJ, FT, CNBC, and 18 more premium sources
31
+ """
32
+
33
+ # Premium financial sources - expanded coverage
34
+ SOURCES = {
35
+ # ===== TIER 1: Major Financial News =====
36
+ 'reuters': {
37
+ 'handle': 'Reuters',
38
+ 'weight': 1.5,
39
+ 'specialization': ['macro', 'geopolitical', 'markets']
40
+ },
41
+ 'bloomberg': {
42
+ 'handle': 'business',
43
+ 'weight': 1.5,
44
+ 'specialization': ['macro', 'markets']
45
+ },
46
+ 'ft': {
47
+ 'handle': 'FT',
48
+ 'weight': 1.4,
49
+ 'specialization': ['macro', 'markets']
50
+ },
51
+ 'economist': {
52
+ 'handle': 'TheEconomist',
53
+ 'weight': 1.3,
54
+ 'specialization': ['macro', 'geopolitical']
55
+ },
56
+ 'wsj': {
57
+ 'handle': 'WSJ',
58
+ 'weight': 1.4,
59
+ 'specialization': ['markets', 'macro']
60
+ },
61
+ 'bloomberg_terminal': {
62
+ 'handle': 'Bloomberg',
63
+ 'weight': 1.5,
64
+ 'specialization': ['macro', 'markets']
65
+ },
66
+ 'cnbc': {
67
+ 'handle': 'CNBC',
68
+ 'weight': 1.2,
69
+ 'specialization': ['markets']
70
+ },
71
+ 'marketwatch': {
72
+ 'handle': 'MarketWatch',
73
+ 'weight': 1.1,
74
+ 'specialization': ['markets']
75
+ },
76
+
77
+ # ===== TIER 2: Geopolitical Intelligence =====
78
+ 'bbc_world': {
79
+ 'handle': 'BBCWorld',
80
+ 'weight': 1.4,
81
+ 'specialization': ['geopolitical']
82
+ },
83
+ 'afp': {
84
+ 'handle': 'AFP',
85
+ 'weight': 1.3,
86
+ 'specialization': ['geopolitical']
87
+ },
88
+ 'aljazeera': {
89
+ 'handle': 'AlJazeera',
90
+ 'weight': 1.2,
91
+ 'specialization': ['geopolitical']
92
+ },
93
+ 'politico': {
94
+ 'handle': 'politico',
95
+ 'weight': 1.2,
96
+ 'specialization': ['geopolitical', 'macro']
97
+ },
98
+ 'dw_news': {
99
+ 'handle': 'dwnews',
100
+ 'weight': 1.2,
101
+ 'specialization': ['geopolitical']
102
+ },
103
+
104
+ # ===== TIER 3: Central Banks & Official Sources =====
105
+ 'federal_reserve': {
106
+ 'handle': 'federalreserve',
107
+ 'weight': 2.0, # Highest priority
108
+ 'specialization': ['macro']
109
+ },
110
+ 'ecb': {
111
+ 'handle': 'ecb',
112
+ 'weight': 2.0,
113
+ 'specialization': ['macro']
114
+ },
115
+ 'lagarde': {
116
+ 'handle': 'Lagarde',
117
+ 'weight': 1.9, # ECB President
118
+ 'specialization': ['macro']
119
+ },
120
+ 'bank_of_england': {
121
+ 'handle': 'bankofengland',
122
+ 'weight': 1.8,
123
+ 'specialization': ['macro']
124
+ },
125
+ 'imf': {
126
+ 'handle': 'IMFNews',
127
+ 'weight': 1.7,
128
+ 'specialization': ['macro', 'geopolitical']
129
+ },
130
+ 'world_bank': {
131
+ 'handle': 'worldbank',
132
+ 'weight': 1.6,
133
+ 'specialization': ['macro', 'geopolitical']
134
+ },
135
+ 'us_treasury': {
136
+ 'handle': 'USTreasury',
137
+ 'weight': 1.8,
138
+ 'specialization': ['macro']
139
+ },
140
+
141
+ # ===== TIER 4: Alpha Accounts (Fast Breaking News) =====
142
+ 'zerohedge': {
143
+ 'handle': 'zerohedge',
144
+ 'weight': 1.0,
145
+ 'specialization': ['markets', 'macro']
146
+ },
147
+ 'first_squawk': {
148
+ 'handle': 'FirstSquawk',
149
+ 'weight': 1.1, # Fast alerts
150
+ 'specialization': ['markets', 'macro']
151
+ },
152
+ 'live_squawk': {
153
+ 'handle': 'LiveSquawk',
154
+ 'weight': 1.1, # Real-time market squawks
155
+ 'specialization': ['markets', 'macro']
156
+ }
157
+ }
158
+
159
+ # Enhanced keyword detection for professional traders
160
+ MACRO_KEYWORDS = [
161
+ # Central Banks & Policy
162
+ 'Fed', 'ECB', 'BoE', 'BoJ', 'FOMC', 'Powell', 'Lagarde',
163
+ 'interest rate', 'rate cut', 'rate hike', 'QE', 'quantitative',
164
+ 'monetary policy', 'inflation', 'CPI', 'PCE', 'tapering',
165
+ # Economic Data
166
+ 'GDP', 'unemployment', 'jobs report', 'NFP', 'payroll',
167
+ 'PMI', 'manufacturing', 'services', 'consumer confidence',
168
+ 'retail sales', 'housing starts', 'durable goods'
169
+ ]
170
+
171
+ MARKET_KEYWORDS = [
172
+ # Equities
173
+ 'S&P', 'Dow', 'Nasdaq', 'Russell', 'earnings', 'EPS',
174
+ 'stock', 'share', 'equity', 'rally', 'selloff', 'correction',
175
+ # Corporate
176
+ 'merger', 'acquisition', 'IPO', 'buyback', 'dividend',
177
+ 'guidance', 'revenue', 'profit', 'loss', 'bankruptcy'
178
+ ]
179
+
180
+ GEOPOLITICAL_KEYWORDS = [
181
+ # Conflicts & Relations
182
+ 'war', 'conflict', 'sanctions', 'trade', 'tariff', 'embargo',
183
+ 'summit', 'treaty', 'diplomacy', 'tension', 'crisis',
184
+ # Regions
185
+ 'Ukraine', 'Russia', 'China', 'Taiwan', 'Middle East',
186
+ 'Iran', 'North Korea', 'EU', 'Brexit'
187
+ ]
188
+
189
+ def __init__(self):
190
+ """Initialize monitor with caching"""
191
+ self.news_cache = []
192
+ self.last_fetch = None
193
+ self.cache_ttl = 180 # 3 minutes for low latency
194
+ self.client = None
195
+ self.authenticated = False
196
+
197
+ async def _authenticate_twikit(self):
198
+ """Authenticate with Twitter using Twikit"""
199
+ if not TWIKIT_AVAILABLE:
200
+ return False
201
+
202
+ try:
203
+ self.client = Client('en-US')
204
+
205
+ # Get credentials from environment variables
206
+ username = os.getenv('TWITTER_USERNAME')
207
+ email = os.getenv('TWITTER_EMAIL')
208
+ password = os.getenv('TWITTER_PASSWORD')
209
+
210
+ if not all([username, email, password]):
211
+ print("⚠️ Twitter credentials not found in environment variables")
212
+ print(" Set TWITTER_USERNAME, TWITTER_EMAIL, TWITTER_PASSWORD in .env")
213
+ return False
214
+
215
+ await self.client.login(
216
+ auth_info_1=username,
217
+ auth_info_2=email,
218
+ password=password
219
+ )
220
+
221
+ self.authenticated = True
222
+ print("✓ Successfully authenticated with Twitter/X")
223
+ return True
224
+
225
+ except Exception as e:
226
+ print(f"⚠️ Twitter authentication failed: {e}")
227
+ return False
228
+
229
+ async def _scrape_twitter_async(self, max_tweets: int = 100) -> List[Dict]:
230
+ """Async method to scrape tweets using Twikit"""
231
+ if not self.authenticated:
232
+ auth_success = await self._authenticate_twikit()
233
+ if not auth_success:
234
+ return self._get_mock_news()
235
+
236
+ all_tweets = []
237
+ tweets_per_source = max(5, max_tweets // len(self.SOURCES))
238
+ failed_sources = 0
239
+
240
+ for source_name, source_info in self.SOURCES.items():
241
+ try:
242
+ handle = source_info['handle']
243
+
244
+ # Search for tweets from this user
245
+ tweets = await self.client.search_tweet(
246
+ f'from:{handle}',
247
+ product='Latest',
248
+ count=tweets_per_source
249
+ )
250
+
251
+ for tweet in tweets:
252
+ # Skip old tweets (>24h)
253
+ tweet_date = datetime.fromisoformat(tweet.created_at.replace('Z', '+00:00'))
254
+ if (datetime.now(tweet_date.tzinfo) - tweet_date).days > 1:
255
+ continue
256
+
257
+ # Skip retweets and replies
258
+ if hasattr(tweet, 'retweeted_tweet') or tweet.in_reply_to_user_id:
259
+ continue
260
+
261
+ # Categorize and analyze
262
+ category = self._categorize_tweet(tweet.text, source_info['specialization'])
263
+ sentiment = self._analyze_sentiment(tweet.text)
264
+ impact = self._assess_impact_twikit(tweet, source_info['weight'])
265
+ is_breaking = self._detect_breaking_news(tweet.text)
266
+
267
+ all_tweets.append({
268
+ 'id': int(tweet.id),
269
+ 'title': tweet.text,
270
+ 'summary': self._extract_summary(tweet.text),
271
+ 'source': source_name.replace('_', ' ').title(),
272
+ 'category': category,
273
+ 'timestamp': tweet_date.replace(tzinfo=None),
274
+ 'sentiment': sentiment,
275
+ 'impact': impact,
276
+ 'url': f'https://twitter.com/{handle}/status/{tweet.id}',
277
+ 'likes': tweet.favorite_count or 0,
278
+ 'retweets': tweet.retweet_count or 0,
279
+ 'is_breaking': is_breaking,
280
+ 'source_weight': source_info['weight']
281
+ })
282
+
283
+ except Exception as e:
284
+ failed_sources += 1
285
+ error_msg = str(e).lower()
286
+ if 'rate limit' in error_msg:
287
+ print(f"⚠️ Rate limited for {source_name}")
288
+ elif 'unauthorized' in error_msg or 'forbidden' in error_msg:
289
+ print(f"⚠️ Access denied for {source_name}")
290
+ else:
291
+ print(f"Error scraping {source_name}: {e}")
292
+ continue
293
+
294
+ # If all sources failed, fall back to mock data
295
+ if failed_sources >= len(self.SOURCES) or len(all_tweets) == 0:
296
+ print("⚠️ Twitter/X scraping failed - falling back to mock data")
297
+ return self._get_mock_news()
298
+
299
+ # Sort by impact and timestamp
300
+ all_tweets.sort(
301
+ key=lambda x: (x['is_breaking'], x['impact'] == 'high', x['timestamp']),
302
+ reverse=True
303
+ )
304
+
305
+ return all_tweets
306
+
307
+ @st.cache_data(ttl=180)
308
+ def scrape_twitter_news(_self, max_tweets: int = 100) -> List[Dict]:
309
+ """
310
+ Scrape latest financial news with caching (sync wrapper)
311
+ max_tweets: Total tweets to fetch (distributed across sources)
312
+ """
313
+ if not TWIKIT_AVAILABLE:
314
+ print("⚠️ Twikit not available - using mock data")
315
+ return _self._get_mock_news()
316
+
317
+ try:
318
+ # Run async scraping in event loop
319
+ loop = asyncio.new_event_loop()
320
+ asyncio.set_event_loop(loop)
321
+ result = loop.run_until_complete(_self._scrape_twitter_async(max_tweets))
322
+ loop.close()
323
+ return result
324
+ except Exception as e:
325
+ print(f"⚠️ Error in async scraping: {e}")
326
+ return _self._get_mock_news()
327
+
328
+ def _categorize_tweet(self, text: str, source_specialization: List[str]) -> str:
329
+ """Advanced categorization with source specialization"""
330
+ text_lower = text.lower()
331
+
332
+ # Count keyword matches
333
+ macro_score = sum(1 for kw in self.MACRO_KEYWORDS if kw.lower() in text_lower)
334
+ market_score = sum(1 for kw in self.MARKET_KEYWORDS if kw.lower() in text_lower)
335
+ geo_score = sum(1 for kw in self.GEOPOLITICAL_KEYWORDS if kw.lower() in text_lower)
336
+
337
+ # Weight by source specialization
338
+ if 'macro' in source_specialization:
339
+ macro_score *= 1.5
340
+ if 'markets' in source_specialization:
341
+ market_score *= 1.5
342
+ if 'geopolitical' in source_specialization:
343
+ geo_score *= 1.5
344
+
345
+ # Return highest scoring category
346
+ scores = {'macro': macro_score, 'markets': market_score, 'geopolitical': geo_score}
347
+ return max(scores, key=scores.get) if max(scores.values()) > 0 else 'markets'
348
+
349
+ def _analyze_sentiment(self, text: str) -> str:
350
+ """Professional sentiment analysis for traders"""
351
+ text_lower = text.lower()
352
+
353
+ positive_signals = ['surge', 'soar', 'rally', 'beat', 'upgrade', 'bullish',
354
+ 'gain', 'rise', 'jump', 'boost', 'optimistic', 'positive']
355
+ negative_signals = ['plunge', 'crash', 'fall', 'miss', 'downgrade', 'bearish',
356
+ 'loss', 'drop', 'slide', 'concern', 'worry', 'negative']
357
+
358
+ pos_count = sum(1 for signal in positive_signals if signal in text_lower)
359
+ neg_count = sum(1 for signal in negative_signals if signal in text_lower)
360
+
361
+ if pos_count > neg_count:
362
+ return 'positive'
363
+ elif neg_count > pos_count:
364
+ return 'negative'
365
+ return 'neutral'
366
+
367
+ def _assess_impact_twikit(self, tweet, source_weight: float) -> str:
368
+ """Assess market impact using Twikit tweet object"""
369
+ engagement = (tweet.favorite_count or 0) + (tweet.retweet_count or 0) * 2
370
+ weighted_engagement = engagement * source_weight
371
+
372
+ if weighted_engagement > 5000 or source_weight >= 1.8:
373
+ return 'high'
374
+ elif weighted_engagement > 1000:
375
+ return 'medium'
376
+ return 'low'
377
+
378
+ def _detect_breaking_news(self, text: str) -> bool:
379
+ """Detect breaking/urgent news"""
380
+ text_upper = text.upper()
381
+ breaking_signals = ['BREAKING', 'ALERT', 'URGENT', 'JUST IN',
382
+ '*FED', '*ECB', '*POWELL', '*LAGARDE']
383
+ return any(signal in text_upper for signal in breaking_signals)
384
+
385
+ def _extract_summary(self, text: str, max_length: int = 150) -> str:
386
+ """Extract clean summary from tweet"""
387
+ # Remove URLs
388
+ text = re.sub(r'http\S+', '', text)
389
+ text = text.strip()
390
+
391
+ if len(text) <= max_length:
392
+ return text
393
+ return text[:max_length] + '...'
394
+
395
+ def _get_mock_news(self) -> List[Dict]:
396
+ """Mock news data when Twikit is unavailable"""
397
+ return [
398
+ {
399
+ 'id': 1,
400
+ 'title': 'BREAKING: Federal Reserve announces emergency rate cut of 50bps - Powell cites economic uncertainty',
401
+ 'summary': 'BREAKING: Fed emergency rate cut 50bps',
402
+ 'source': 'Federal Reserve',
403
+ 'category': 'macro',
404
+ 'timestamp': datetime.now() - timedelta(minutes=5),
405
+ 'sentiment': 'negative',
406
+ 'impact': 'high',
407
+ 'url': 'https://twitter.com/federalreserve',
408
+ 'likes': 5000,
409
+ 'retweets': 2000,
410
+ 'is_breaking': True,
411
+ 'source_weight': 2.0
412
+ },
413
+ {
414
+ 'id': 2,
415
+ 'title': '*FIRST SQUAWK: S&P 500 FUTURES DROP 2% AFTER FED ANNOUNCEMENT',
416
+ 'summary': '*FIRST SQUAWK: S&P 500 futures drop 2%',
417
+ 'source': 'First Squawk',
418
+ 'category': 'markets',
419
+ 'timestamp': datetime.now() - timedelta(minutes=10),
420
+ 'sentiment': 'negative',
421
+ 'impact': 'high',
422
+ 'url': 'https://twitter.com/FirstSquawk',
423
+ 'likes': 1500,
424
+ 'retweets': 600,
425
+ 'is_breaking': False,
426
+ 'source_weight': 1.1
427
+ },
428
+ {
429
+ 'id': 3,
430
+ 'title': 'Apple reports earnings beat with $123B revenue, raises dividend by 4% - Stock up 3% after hours',
431
+ 'summary': 'Apple beats earnings, raises dividend 4%',
432
+ 'source': 'Bloomberg',
433
+ 'category': 'markets',
434
+ 'timestamp': datetime.now() - timedelta(minutes=25),
435
+ 'sentiment': 'positive',
436
+ 'impact': 'high',
437
+ 'url': 'https://twitter.com/business',
438
+ 'likes': 2800,
439
+ 'retweets': 900,
440
+ 'is_breaking': False,
441
+ 'source_weight': 1.5
442
+ },
443
+ {
444
+ 'id': 4,
445
+ 'title': 'ECB President Lagarde: Inflation remains above target, rates to stay higher for longer',
446
+ 'summary': 'Lagarde: rates to stay higher for longer',
447
+ 'source': 'Lagarde',
448
+ 'category': 'macro',
449
+ 'timestamp': datetime.now() - timedelta(minutes=45),
450
+ 'sentiment': 'neutral',
451
+ 'impact': 'high',
452
+ 'url': 'https://twitter.com/Lagarde',
453
+ 'likes': 1200,
454
+ 'retweets': 400,
455
+ 'is_breaking': False,
456
+ 'source_weight': 1.9
457
+ },
458
+ {
459
+ 'id': 5,
460
+ 'title': 'Ukraine conflict: New peace talks scheduled as tensions ease in Eastern Europe',
461
+ 'summary': 'Ukraine: New peace talks scheduled',
462
+ 'source': 'BBC World',
463
+ 'category': 'geopolitical',
464
+ 'timestamp': datetime.now() - timedelta(hours=1),
465
+ 'sentiment': 'positive',
466
+ 'impact': 'medium',
467
+ 'url': 'https://twitter.com/BBCWorld',
468
+ 'likes': 3500,
469
+ 'retweets': 1200,
470
+ 'is_breaking': False,
471
+ 'source_weight': 1.4
472
+ },
473
+ {
474
+ 'id': 6,
475
+ 'title': 'US GDP growth revised up to 2.8% in Q4, beating economists expectations of 2.5%',
476
+ 'summary': 'US GDP growth revised up to 2.8% in Q4',
477
+ 'source': 'Reuters',
478
+ 'category': 'macro',
479
+ 'timestamp': datetime.now() - timedelta(hours=2),
480
+ 'sentiment': 'positive',
481
+ 'impact': 'medium',
482
+ 'url': 'https://twitter.com/Reuters',
483
+ 'likes': 1800,
484
+ 'retweets': 600,
485
+ 'is_breaking': False,
486
+ 'source_weight': 1.5
487
+ },
488
+ {
489
+ 'id': 7,
490
+ 'title': '*LIVE SQUAWK: Oil prices surge 5% on Middle East supply concerns, Brent crude at $92/barrel',
491
+ 'summary': '*LIVE SQUAWK: Oil surges 5% on supply fears',
492
+ 'source': 'Live Squawk',
493
+ 'category': 'markets',
494
+ 'timestamp': datetime.now() - timedelta(hours=3),
495
+ 'sentiment': 'neutral',
496
+ 'impact': 'medium',
497
+ 'url': 'https://twitter.com/LiveSquawk',
498
+ 'likes': 900,
499
+ 'retweets': 350,
500
+ 'is_breaking': False,
501
+ 'source_weight': 1.1
502
+ },
503
+ {
504
+ 'id': 8,
505
+ 'title': 'IMF upgrades global growth forecast to 3.2% for 2024, warns of recession risks in Europe',
506
+ 'summary': 'IMF upgrades global growth to 3.2%',
507
+ 'source': 'IMF',
508
+ 'category': 'macro',
509
+ 'timestamp': datetime.now() - timedelta(hours=4),
510
+ 'sentiment': 'neutral',
511
+ 'impact': 'medium',
512
+ 'url': 'https://twitter.com/IMFNews',
513
+ 'likes': 800,
514
+ 'retweets': 300,
515
+ 'is_breaking': False,
516
+ 'source_weight': 1.7
517
+ },
518
+ {
519
+ 'id': 9,
520
+ 'title': 'US-China trade talks resume in Washington, focus on technology transfer and tariffs',
521
+ 'summary': 'US-China trade talks resume',
522
+ 'source': 'Politico',
523
+ 'category': 'geopolitical',
524
+ 'timestamp': datetime.now() - timedelta(hours=5),
525
+ 'sentiment': 'neutral',
526
+ 'impact': 'low',
527
+ 'url': 'https://twitter.com/politico',
528
+ 'likes': 600,
529
+ 'retweets': 200,
530
+ 'is_breaking': False,
531
+ 'source_weight': 1.2
532
+ },
533
+ {
534
+ 'id': 10,
535
+ 'title': 'Bank of America cuts recession probability to 20%, cites resilient consumer spending',
536
+ 'summary': 'BofA cuts recession probability to 20%',
537
+ 'source': 'FT',
538
+ 'category': 'markets',
539
+ 'timestamp': datetime.now() - timedelta(hours=6),
540
+ 'sentiment': 'positive',
541
+ 'impact': 'low',
542
+ 'url': 'https://twitter.com/FT',
543
+ 'likes': 700,
544
+ 'retweets': 250,
545
+ 'is_breaking': False,
546
+ 'source_weight': 1.4
547
+ }
548
+ ]
549
+
550
+ def get_news(self, category: str = 'all', sentiment: str = 'all',
551
+ impact: str = 'all', refresh: bool = False) -> pd.DataFrame:
552
+ """
553
+ Get filtered news with intelligent caching
554
+
555
+ Args:
556
+ category: 'all', 'macro', 'geopolitical', 'markets'
557
+ sentiment: 'all', 'positive', 'negative', 'neutral'
558
+ impact: 'all', 'high', 'medium', 'low'
559
+ refresh: Force refresh cache
560
+ """
561
+ # Check cache freshness
562
+ if refresh or not self.last_fetch or \
563
+ (datetime.now() - self.last_fetch).seconds > self.cache_ttl:
564
+ self.news_cache = self.scrape_twitter_news(max_tweets=100)
565
+ self.last_fetch = datetime.now()
566
+
567
+ news = self.news_cache.copy()
568
+
569
+ # Apply filters
570
+ if category != 'all':
571
+ news = [n for n in news if n['category'] == category]
572
+
573
+ if sentiment != 'all':
574
+ news = [n for n in news if n['sentiment'] == sentiment]
575
+
576
+ if impact != 'all':
577
+ news = [n for n in news if n['impact'] == impact]
578
+
579
+ df = pd.DataFrame(news)
580
+ if not df.empty:
581
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
582
+
583
+ return df
584
+
585
+ def get_breaking_news(self) -> pd.DataFrame:
586
+ """Get only breaking/high-impact news for alerts"""
587
+ return self.get_news(impact='high')
588
+
589
+ def get_statistics(self) -> Dict:
590
+ """Get feed statistics"""
591
+ if not self.news_cache:
592
+ return {
593
+ 'total': 0,
594
+ 'high_impact': 0,
595
+ 'breaking': 0,
596
+ 'last_update': 'Never',
597
+ 'by_category': {}
598
+ }
599
+
600
+ df = pd.DataFrame(self.news_cache)
601
+
602
+ return {
603
+ 'total': len(df),
604
+ 'high_impact': len(df[df['impact'] == 'high']),
605
+ 'breaking': len(df[df['is_breaking'] == True]),
606
+ 'last_update': self.last_fetch.strftime('%H:%M:%S') if self.last_fetch else 'Never',
607
+ 'by_category': df['category'].value_counts().to_dict()
608
+ }
app/utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Utilities package for financial platform."""