|
|
import requests |
|
|
from bs4 import BeautifulSoup |
|
|
import feedparser |
|
|
from datetime import datetime |
|
|
import time |
|
|
|
|
|
class YahooFinanceScraper: |
|
|
def __init__(self): |
|
|
self.headers = { |
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' |
|
|
} |
|
|
self.base_url = "https://finance.yahoo.com" |
|
|
|
|
|
def get_latest_news(self, symbol="", max_articles=10): |
|
|
""" |
|
|
ดึงข่าวล่าสุดจาก Yahoo Finance |
|
|
|
|
|
Args: |
|
|
symbol: ticker symbol (เช่น AAPL, TSLA) หรือเว้นว่างสำหรับข่าวทั่วไป |
|
|
max_articles: จำนวนข่าวที่ต้องการ |
|
|
|
|
|
Returns: |
|
|
list: รายการข่าว [{title, link, summary, published}] |
|
|
""" |
|
|
news_list = [] |
|
|
|
|
|
try: |
|
|
if symbol: |
|
|
|
|
|
url = f"{self.base_url}/quote/{symbol}" |
|
|
response = requests.get(url, headers=self.headers, timeout=10) |
|
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
|
|
|
|
|
|
|
news_items = soup.find_all('li', class_='stream-item', limit=max_articles) |
|
|
|
|
|
for item in news_items: |
|
|
try: |
|
|
title_tag = item.find('h3') |
|
|
link_tag = item.find('a') |
|
|
summary_tag = item.find('p') |
|
|
time_tag = item.find('time') |
|
|
|
|
|
if title_tag and link_tag: |
|
|
news_list.append({ |
|
|
'title': title_tag.get_text(strip=True), |
|
|
'link': link_tag.get('href', ''), |
|
|
'summary': summary_tag.get_text(strip=True) if summary_tag else '', |
|
|
'published': time_tag.get_text(strip=True) if time_tag else 'N/A' |
|
|
}) |
|
|
except Exception as e: |
|
|
continue |
|
|
|
|
|
else: |
|
|
|
|
|
rss_url = "https://finance.yahoo.com/news/rssindex" |
|
|
feed = feedparser.parse(rss_url) |
|
|
|
|
|
for entry in feed.entries[:max_articles]: |
|
|
news_list.append({ |
|
|
'title': entry.get('title', 'No title'), |
|
|
'link': entry.get('link', ''), |
|
|
'summary': entry.get('summary', '')[:300], |
|
|
'published': entry.get('published', 'N/A') |
|
|
}) |
|
|
|
|
|
|
|
|
if not news_list: |
|
|
news_list = self._get_fallback_news(max_articles) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error scraping news: {e}") |
|
|
news_list = self._get_fallback_news(max_articles) |
|
|
|
|
|
return news_list |
|
|
|
|
|
def _get_fallback_news(self, max_articles): |
|
|
"""วิธีสำรองในกรณีที่ดึงข่าวไม่ได้""" |
|
|
try: |
|
|
|
|
|
url = "https://finance.yahoo.com/rss/topstories" |
|
|
feed = feedparser.parse(url) |
|
|
|
|
|
news_list = [] |
|
|
for entry in feed.entries[:max_articles]: |
|
|
news_list.append({ |
|
|
'title': entry.get('title', 'No title'), |
|
|
'link': entry.get('link', ''), |
|
|
'summary': entry.get('summary', '')[:300], |
|
|
'published': entry.get('published', 'N/A') |
|
|
}) |
|
|
|
|
|
return news_list |
|
|
except: |
|
|
return [{ |
|
|
'title': 'Unable to fetch news', |
|
|
'link': '', |
|
|
'summary': 'Please try again later', |
|
|
'published': 'N/A' |
|
|
}] |
|
|
|
|
|
def search_news(self, keyword, max_articles=10): |
|
|
""" |
|
|
ค้นหาข่าวด้วย keyword |
|
|
|
|
|
Args: |
|
|
keyword: คำค้นหา |
|
|
max_articles: จำนวนข่าว |
|
|
|
|
|
Returns: |
|
|
list: รายการข่าวที่เกี่ยวข้อง |
|
|
""" |
|
|
try: |
|
|
search_url = f"{self.base_url}/search?p={keyword}" |
|
|
response = requests.get(search_url, headers=self.headers, timeout=10) |
|
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
|
|
|
|
news_list = [] |
|
|
articles = soup.find_all('div', class_='Ov(h)', limit=max_articles) |
|
|
|
|
|
for article in articles: |
|
|
try: |
|
|
title = article.find('h3') |
|
|
link = article.find('a') |
|
|
|
|
|
if title and link: |
|
|
news_list.append({ |
|
|
'title': title.get_text(strip=True), |
|
|
'link': link.get('href', ''), |
|
|
'summary': '', |
|
|
'published': 'Recent' |
|
|
}) |
|
|
except: |
|
|
continue |
|
|
|
|
|
return news_list if news_list else self.get_latest_news(max_articles=max_articles) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Search error: {e}") |
|
|
return self.get_latest_news(max_articles=max_articles) |