Spaces:

Supitn
/

try_topic

No application file

File size: 5,776 Bytes

d08f4c4

import requests
from bs4 import BeautifulSoup
import feedparser
from datetime import datetime
import time

class YahooFinanceScraper:
    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        }
        self.base_url = "https://finance.yahoo.com"
    
    def get_latest_news(self, symbol="", max_articles=10):
        """
        ดึงข่าวล่าสุดจาก Yahoo Finance
        
        Args:
            symbol: ticker symbol (เช่น AAPL, TSLA) หรือเว้นว่างสำหรับข่าวทั่วไป
            max_articles: จำนวนข่าวที่ต้องการ
        
        Returns:
            list: รายการข่าว [{title, link, summary, published}]
        """
        news_list = []
        
        try:
            if symbol:
                # ดึงข่าวเฉพาะหุ้น
                url = f"{self.base_url}/quote/{symbol}"
                response = requests.get(url, headers=self.headers, timeout=10)
                soup = BeautifulSoup(response.content, 'html.parser')
                
                # หา news section
                news_items = soup.find_all('li', class_='stream-item', limit=max_articles)
                
                for item in news_items:
                    try:
                        title_tag = item.find('h3')
                        link_tag = item.find('a')
                        summary_tag = item.find('p')
                        time_tag = item.find('time')
                        
                        if title_tag and link_tag:
                            news_list.append({
                                'title': title_tag.get_text(strip=True),
                                'link': link_tag.get('href', ''),
                                'summary': summary_tag.get_text(strip=True) if summary_tag else '',
                                'published': time_tag.get_text(strip=True) if time_tag else 'N/A'
                            })
                    except Exception as e:
                        continue
            
            else:
                # ดึงข่าวทั่วไปผ่าน RSS Feed
                rss_url = "https://finance.yahoo.com/news/rssindex"
                feed = feedparser.parse(rss_url)
                
                for entry in feed.entries[:max_articles]:
                    news_list.append({
                        'title': entry.get('title', 'No title'),
                        'link': entry.get('link', ''),
                        'summary': entry.get('summary', '')[:300],  # จำกัด 300 ตัวอักษร
                        'published': entry.get('published', 'N/A')
                    })
            
            # ถ้าไม่มีข่าว ลองวิธีสำรอง
            if not news_list:
                news_list = self._get_fallback_news(max_articles)
            
        except Exception as e:
            print(f"Error scraping news: {e}")
            news_list = self._get_fallback_news(max_articles)
        
        return news_list
    
    def _get_fallback_news(self, max_articles):
        """วิธีสำรองในกรณีที่ดึงข่าวไม่ได้"""
        try:
            # ใช้ Yahoo Finance RSS feed แบบง่าย
            url = "https://finance.yahoo.com/rss/topstories"
            feed = feedparser.parse(url)
            
            news_list = []
            for entry in feed.entries[:max_articles]:
                news_list.append({
                    'title': entry.get('title', 'No title'),
                    'link': entry.get('link', ''),
                    'summary': entry.get('summary', '')[:300],
                    'published': entry.get('published', 'N/A')
                })
            
            return news_list
        except:
            return [{
                'title': 'Unable to fetch news',
                'link': '',
                'summary': 'Please try again later',
                'published': 'N/A'
            }]
    
    def search_news(self, keyword, max_articles=10):
        """
        ค้นหาข่าวด้วย keyword
        
        Args:
            keyword: คำค้นหา
            max_articles: จำนวนข่าว
        
        Returns:
            list: รายการข่าวที่เกี่ยวข้อง
        """
        try:
            search_url = f"{self.base_url}/search?p={keyword}"
            response = requests.get(search_url, headers=self.headers, timeout=10)
            soup = BeautifulSoup(response.content, 'html.parser')
            
            news_list = []
            articles = soup.find_all('div', class_='Ov(h)', limit=max_articles)
            
            for article in articles:
                try:
                    title = article.find('h3')
                    link = article.find('a')
                    
                    if title and link:
                        news_list.append({
                            'title': title.get_text(strip=True),
                            'link': link.get('href', ''),
                            'summary': '',
                            'published': 'Recent'
                        })
                except:
                    continue
            
            return news_list if news_list else self.get_latest_news(max_articles=max_articles)
            
        except Exception as e:
            print(f"Search error: {e}")
            return self.get_latest_news(max_articles=max_articles)