File size: 631 Bytes
f55f92e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from __future__ import annotations

from dataclasses import dataclass


@dataclass
class FetchResult:
    url: str
    status: int
    fetched_at: str
    content_type: str
    html: str


@dataclass
class CrawlStats:
    queued_urls: int = 0
    fetch_reserved: int = 0
    fetch_succeeded: int = 0
    fetch_failed: int = 0
    parsed_pages: int = 0
    parse_failed: int = 0
    extracted_links: int = 0
    dropped_urls: int = 0
    robots_blocked: int = 0
    stored_rows: int = 0
    written_shards: int = 0
    uploaded_shards: int = 0
    tokenized_shards: int = 0
    tokenized_rows: int = 0
    tokenized_tokens: int = 0