Dmitry Beresnev
commited on
Commit
·
903cdee
1
Parent(s):
0abb67a
add yahoo finance
Browse files- app/services/news_scraper.py +16 -4
app/services/news_scraper.py
CHANGED
|
@@ -4,16 +4,19 @@ Scrapes: Reuters, Bloomberg, FT, WSJ, CNBC, MarketWatch, etc.
|
|
| 4 |
No Twitter API needed - direct RSS and web scraping
|
| 5 |
"""
|
| 6 |
|
| 7 |
-
import pandas as pd
|
| 8 |
from datetime import datetime, timedelta
|
| 9 |
from typing import List, Dict, Optional
|
| 10 |
-
|
| 11 |
import logging
|
| 12 |
import re
|
| 13 |
-
import
|
|
|
|
| 14 |
import requests
|
|
|
|
|
|
|
|
|
|
| 15 |
from bs4 import BeautifulSoup
|
| 16 |
-
|
| 17 |
|
| 18 |
# Configure logging
|
| 19 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -80,6 +83,15 @@ class FinanceNewsScraper:
|
|
| 80 |
'web_priority': True,
|
| 81 |
'specialization': ['geopolitical', 'macro']
|
| 82 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
# ===== TIER 3: Central Banks & Institutions =====
|
| 85 |
'federal_reserve': {
|
|
|
|
| 4 |
No Twitter API needed - direct RSS and web scraping
|
| 5 |
"""
|
| 6 |
|
|
|
|
| 7 |
from datetime import datetime, timedelta
|
| 8 |
from typing import List, Dict, Optional
|
| 9 |
+
|
| 10 |
import logging
|
| 11 |
import re
|
| 12 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 13 |
+
|
| 14 |
import requests
|
| 15 |
+
import pandas as pd
|
| 16 |
+
import feedparser
|
| 17 |
+
import streamlit as st
|
| 18 |
from bs4 import BeautifulSoup
|
| 19 |
+
|
| 20 |
|
| 21 |
# Configure logging
|
| 22 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 83 |
'web_priority': True,
|
| 84 |
'specialization': ['geopolitical', 'macro']
|
| 85 |
},
|
| 86 |
+
'yahoo_finance': {
|
| 87 |
+
'name': 'Yahoo Finance',
|
| 88 |
+
'rss': 'https://finance.yahoo.com/news/rssindex',
|
| 89 |
+
'web': 'https://finance.yahoo.com/',
|
| 90 |
+
'selectors': {'headline': 'h3.clamp', 'link': 'a'},
|
| 91 |
+
'weight': 1.3,
|
| 92 |
+
'web_priority': True,
|
| 93 |
+
'specialization': ['markets', 'macro']
|
| 94 |
+
},
|
| 95 |
|
| 96 |
# ===== TIER 3: Central Banks & Institutions =====
|
| 97 |
'federal_reserve': {
|