File size: 2,497 Bytes
3a36548
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""

ํ†ตํ•ฉ ์ •๋‹น ํฌ๋กค๋Ÿฌ

- ๋”๋ถˆ์–ด๋ฏผ์ฃผ๋‹น, ๊ตญ๋ฏผ์˜ํž˜, ์กฐ๊ตญํ˜์‹ ๋‹น, ๊ฐœํ˜์‹ ๋‹น, ๊ธฐ๋ณธ์†Œ๋“๋‹น, ์ง„๋ณด๋‹น ๋™์‹œ ํฌ๋กค๋ง

- ๊ฐ ์ •๋‹น๋ณ„ ๋…๋ฆฝ์ ์ธ ํ—ˆ๊น…ํŽ˜์ด์Šค ์—…๋กœ๋“œ

- ๋น„๋™๊ธฐ ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ



โ€ป CLI ์ธ์ž ์ง€์›์ด ํ•„์š”ํ•œ ๊ฒฝ์šฐ main.py ๋ฅผ ์‚ฌ์šฉํ•˜์„ธ์š”.

"""

import asyncio
import logging
from datetime import datetime

from minjoo_crawler_async import MinjooAsyncCrawler
from ppp_crawler_async import PPPAsyncCrawler
from rebuilding_crawler_async import RebuildingAsyncCrawler
from reform_crawler_async import ReformAsyncCrawler
from basic_income_crawler_async import BasicIncomeAsyncCrawler
from jinbo_crawler_async import JinboAsyncCrawler

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    handlers=[
        logging.FileHandler('unified_crawler.log', encoding='utf-8'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

CRAWLERS = {
    '๋”๋ถˆ์–ด๋ฏผ์ฃผ๋‹น': MinjooAsyncCrawler,
    '๊ตญ๋ฏผ์˜ํž˜':     PPPAsyncCrawler,
    '์กฐ๊ตญํ˜์‹ ๋‹น':   RebuildingAsyncCrawler,
    '๊ฐœํ˜์‹ ๋‹น':     ReformAsyncCrawler,
    '๊ธฐ๋ณธ์†Œ๋“๋‹น':   BasicIncomeAsyncCrawler,
    '์ง„๋ณด๋‹น':       JinboAsyncCrawler,
}


async def crawl_all_parties():
    """6๊ฐœ ์ •๋‹น ๋™์‹œ ํฌ๋กค๋ง"""
    logger.info("=" * 60)
    logger.info("ํ†ตํ•ฉ ์ •๋‹น ํฌ๋กค๋Ÿฌ ์‹œ์ž‘")
    logger.info(" + ".join(CRAWLERS.keys()))
    logger.info("=" * 60)

    start_time = datetime.now()

    crawlers = [cls() for cls in CRAWLERS.values()]
    party_names = list(CRAWLERS.keys())

    results = await asyncio.gather(
        *[crawler.run_incremental() for crawler in crawlers],
        return_exceptions=True
    )

    for party, result in zip(party_names, results):
        if isinstance(result, Exception):
            logger.error(f"{party} ํฌ๋กค๋ง ์‹คํŒจ: {result}")
        else:
            logger.info(f"{party} ํฌ๋กค๋ง ์™„๋ฃŒ")

    duration = (datetime.now() - start_time).total_seconds()
    logger.info("=" * 60)
    logger.info(f"์ „์ฒด ํฌ๋กค๋ง ์™„๋ฃŒ")
    logger.info(f"์†Œ์š” ์‹œ๊ฐ„: {duration:.1f}์ดˆ ({duration / 60:.1f}๋ถ„)")
    logger.info("=" * 60)


# ํ•˜์œ„ ํ˜ธํ™˜์„ฑ ์œ ์ง€
async def crawl_both_parties():
    await crawl_all_parties()


async def main():
    await crawl_all_parties()


if __name__ == "__main__":
    asyncio.run(main())