Spaces:

Supitn
/

try_topic

No application file

App Files Files Community

Supitn commited on Nov 3, 2025

Commit

d08f4c4

verified ·

1 Parent(s): a27ea20

Create scraper.py

Browse files

Files changed (1) hide show

scraper.py +140 -0

scraper.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import requests
+from bs4 import BeautifulSoup
+import feedparser
+from datetime import datetime
+import time
+class YahooFinanceScraper:
+    def __init__(self):
+        self.headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
+        }
+        self.base_url = "https://finance.yahoo.com"
+    def get_latest_news(self, symbol="", max_articles=10):
+        """
+        ดึงข่าวล่าสุดจาก Yahoo Finance
+        Args:
+            symbol: ticker symbol (เช่น AAPL, TSLA) หรือเว้นว่างสำหรับข่าวทั่วไป
+            max_articles: จำนวนข่าวที่ต้องการ
+        Returns:
+            list: รายการข่าว [{title, link, summary, published}]
+        """
+        news_list = []
+        try:
+            if symbol:
+                # ดึงข่าวเฉพาะหุ้น
+                url = f"{self.base_url}/quote/{symbol}"
+                response = requests.get(url, headers=self.headers, timeout=10)
+                soup = BeautifulSoup(response.content, 'html.parser')
+                # หา news section
+                news_items = soup.find_all('li', class_='stream-item', limit=max_articles)
+                for item in news_items:
+                    try:
+                        title_tag = item.find('h3')
+                        link_tag = item.find('a')
+                        summary_tag = item.find('p')
+                        time_tag = item.find('time')
+                        if title_tag and link_tag:
+                            news_list.append({
+                                'title': title_tag.get_text(strip=True),
+                                'link': link_tag.get('href', ''),
+                                'summary': summary_tag.get_text(strip=True) if summary_tag else '',
+                                'published': time_tag.get_text(strip=True) if time_tag else 'N/A'
+                            })
+                    except Exception as e:
+                        continue
+            else:
+                # ดึงข่าวทั่วไปผ่าน RSS Feed
+                rss_url = "https://finance.yahoo.com/news/rssindex"
+                feed = feedparser.parse(rss_url)
+                for entry in feed.entries[:max_articles]:
+                    news_list.append({
+                        'title': entry.get('title', 'No title'),
+                        'link': entry.get('link', ''),
+                        'summary': entry.get('summary', '')[:300],  # จำกัด 300 ตัวอักษร
+                        'published': entry.get('published', 'N/A')
+                    })
+            # ถ้าไม่มีข่าว ลองวิธีสำรอง
+            if not news_list:
+                news_list = self._get_fallback_news(max_articles)
+        except Exception as e:
+            print(f"Error scraping news: {e}")
+            news_list = self._get_fallback_news(max_articles)
+        return news_list
+    def _get_fallback_news(self, max_articles):
+        """วิธีสำรองในกรณีที่ดึงข่าวไม่ได้"""
+        try:
+            # ใช้ Yahoo Finance RSS feed แบบง่าย
+            url = "https://finance.yahoo.com/rss/topstories"
+            feed = feedparser.parse(url)
+            news_list = []
+            for entry in feed.entries[:max_articles]:
+                news_list.append({
+                    'title': entry.get('title', 'No title'),
+                    'link': entry.get('link', ''),
+                    'summary': entry.get('summary', '')[:300],
+                    'published': entry.get('published', 'N/A')
+                })
+            return news_list
+        except:
+            return [{
+                'title': 'Unable to fetch news',
+                'link': '',
+                'summary': 'Please try again later',
+                'published': 'N/A'
+            }]
+    def search_news(self, keyword, max_articles=10):
+        """
+        ค้นหาข่าวด้วย keyword
+        Args:
+            keyword: คำค้นหา
+            max_articles: จำนวนข่าว
+        Returns:
+            list: รายการข่าวที่เกี่ยวข้อง
+        """
+        try:
+            search_url = f"{self.base_url}/search?p={keyword}"
+            response = requests.get(search_url, headers=self.headers, timeout=10)
+            soup = BeautifulSoup(response.content, 'html.parser')
+            news_list = []
+            articles = soup.find_all('div', class_='Ov(h)', limit=max_articles)
+            for article in articles:
+                try:
+                    title = article.find('h3')
+                    link = article.find('a')
+                    if title and link:
+                        news_list.append({
+                            'title': title.get_text(strip=True),
+                            'link': link.get('href', ''),
+                            'summary': '',
+                            'published': 'Recent'
+                        })
+                except:
+                    continue
+            return news_list if news_list else self.get_latest_news(max_articles=max_articles)
+        except Exception as e:
+            print(f"Search error: {e}")
+            return self.get_latest_news(max_articles=max_articles)