import asyncio from typing import Optional import aiohttp from src.scraping.exceptions import ArticleNotFoundError, FetchError async def fetch_html( url: str, timeout_s: float = 20.0, user_agent: Optional[str] = None, ) -> str: """ HTMLを取得する Raises: ArticleNotFoundError: 404エラーの場合 FetchError: その他のネットワークエラーの場合 """ headers = { "User-Agent": user_agent or ( "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/124.0 Safari/537.36" ) } timeout = aiohttp.ClientTimeout(total=timeout_s) try: async with aiohttp.ClientSession(timeout=timeout, headers=headers) as session: async with session.get(url, allow_redirects=True) as resp: if resp.status == 404: raise ArticleNotFoundError(f"記事が見つかりません: {url}") resp.raise_for_status() return await resp.text() except ArticleNotFoundError: raise except aiohttp.ClientResponseError as e: raise FetchError(f"HTTPエラー {e.status}: {url}") except (aiohttp.ClientError, aiohttp.http_exceptions.HttpProcessingError, asyncio.TimeoutError) as e: raise FetchError(f"ネットワークエラー: {url} - {str(e)}")