File size: 1,381 Bytes
fb05e78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import asyncio
from typing import Optional

import aiohttp

from src.scraping.exceptions import ArticleNotFoundError, FetchError

async def fetch_html(
    url: str,
    timeout_s: float = 20.0,
    user_agent: Optional[str] = None,
) -> str:
    """
    HTMLを取得する
    
    Raises:
        ArticleNotFoundError: 404エラーの場合
        FetchError: その他のネットワークエラーの場合
    """
    headers = {
        "User-Agent": user_agent or (
            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
            "(KHTML, like Gecko) Chrome/124.0 Safari/537.36"
        )
    }
    timeout = aiohttp.ClientTimeout(total=timeout_s)
    try:
        async with aiohttp.ClientSession(timeout=timeout, headers=headers) as session:
            async with session.get(url, allow_redirects=True) as resp:
                if resp.status == 404:
                    raise ArticleNotFoundError(f"記事が見つかりません: {url}")
                resp.raise_for_status()
                return await resp.text()
    except ArticleNotFoundError:
        raise
    except aiohttp.ClientResponseError as e:
        raise FetchError(f"HTTPエラー {e.status}: {url}")
    except (aiohttp.ClientError, aiohttp.http_exceptions.HttpProcessingError, asyncio.TimeoutError) as e:
        raise FetchError(f"ネットワークエラー: {url} - {str(e)}")