Dmitry Beresnev
commited on
Commit
·
0e6d54c
1
Parent(s):
bb8a8a0
fix news feed
Browse files- app/components/news.py +2 -2
- app/services/news_scraper.py +7 -2
app/components/news.py
CHANGED
|
@@ -81,7 +81,7 @@ def display_news_card(news_item: dict):
|
|
| 81 |
<!-- Title -->
|
| 82 |
<h3 style='color: #f3f4f6; margin: 0 0 12px 0; font-size: 17px;
|
| 83 |
line-height: 1.5; font-weight: 600;'>
|
| 84 |
-
{
|
| 85 |
</h3>
|
| 86 |
|
| 87 |
<!-- Meta info -->
|
|
@@ -245,7 +245,7 @@ def display_breaking_news_banner(df: pd.DataFrame):
|
|
| 245 |
BREAKING NEWS • {latest['source'].upper()}
|
| 246 |
</div>
|
| 247 |
<div style='color: white; font-size: 18px; font-weight: 600; line-height: 1.4;'>
|
| 248 |
-
{
|
| 249 |
</div>
|
| 250 |
</div>
|
| 251 |
<a href='{latest['url']}' target='_blank'
|
|
|
|
| 81 |
<!-- Title -->
|
| 82 |
<h3 style='color: #f3f4f6; margin: 0 0 12px 0; font-size: 17px;
|
| 83 |
line-height: 1.5; font-weight: 600;'>
|
| 84 |
+
{news_item.get('summary', '').strip()}
|
| 85 |
</h3>
|
| 86 |
|
| 87 |
<!-- Meta info -->
|
|
|
|
| 245 |
BREAKING NEWS • {latest['source'].upper()}
|
| 246 |
</div>
|
| 247 |
<div style='color: white; font-size: 18px; font-weight: 600; line-height: 1.4;'>
|
| 248 |
+
{latest.get('summary', '').strip()}
|
| 249 |
</div>
|
| 250 |
</div>
|
| 251 |
<a href='{latest['url']}' target='_blank'
|
app/services/news_scraper.py
CHANGED
|
@@ -239,8 +239,13 @@ class FinanceNewsScraper:
|
|
| 239 |
continue
|
| 240 |
|
| 241 |
# Skip if title looks like it contains HTML comments or code
|
| 242 |
-
if
|
| 243 |
-
logger.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
continue
|
| 245 |
|
| 246 |
# Find associated link
|
|
|
|
| 239 |
continue
|
| 240 |
|
| 241 |
# Skip if title looks like it contains HTML comments or code
|
| 242 |
+
if any(marker in title for marker in ['<!--', '-->', 'style=', '<div', '</div>', '<span', '</span>', 'justify-content', 'flex:', 'padding:']):
|
| 243 |
+
logger.warning(f"Skipping malformed title from {source_name} (contains HTML): {title[:100]}...")
|
| 244 |
+
continue
|
| 245 |
+
|
| 246 |
+
# Skip if title is suspiciously long (likely scraped wrong element)
|
| 247 |
+
if len(title) > 500:
|
| 248 |
+
logger.warning(f"Skipping suspiciously long title from {source_name}: {len(title)} chars")
|
| 249 |
continue
|
| 250 |
|
| 251 |
# Find associated link
|