File size: 2,497 Bytes
3a36548 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | #!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
ํตํฉ ์ ๋น ํฌ๋กค๋ฌ
- ๋๋ถ์ด๋ฏผ์ฃผ๋น, ๊ตญ๋ฏผ์ํ, ์กฐ๊ตญํ์ ๋น, ๊ฐํ์ ๋น, ๊ธฐ๋ณธ์๋๋น, ์ง๋ณด๋น ๋์ ํฌ๋กค๋ง
- ๊ฐ ์ ๋น๋ณ ๋
๋ฆฝ์ ์ธ ํ๊น
ํ์ด์ค ์
๋ก๋
- ๋น๋๊ธฐ ๋ณ๋ ฌ ์ฒ๋ฆฌ
โป CLI ์ธ์ ์ง์์ด ํ์ํ ๊ฒฝ์ฐ main.py ๋ฅผ ์ฌ์ฉํ์ธ์.
"""
import asyncio
import logging
from datetime import datetime
from minjoo_crawler_async import MinjooAsyncCrawler
from ppp_crawler_async import PPPAsyncCrawler
from rebuilding_crawler_async import RebuildingAsyncCrawler
from reform_crawler_async import ReformAsyncCrawler
from basic_income_crawler_async import BasicIncomeAsyncCrawler
from jinbo_crawler_async import JinboAsyncCrawler
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(message)s',
handlers=[
logging.FileHandler('unified_crawler.log', encoding='utf-8'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
CRAWLERS = {
'๋๋ถ์ด๋ฏผ์ฃผ๋น': MinjooAsyncCrawler,
'๊ตญ๋ฏผ์ํ': PPPAsyncCrawler,
'์กฐ๊ตญํ์ ๋น': RebuildingAsyncCrawler,
'๊ฐํ์ ๋น': ReformAsyncCrawler,
'๊ธฐ๋ณธ์๋๋น': BasicIncomeAsyncCrawler,
'์ง๋ณด๋น': JinboAsyncCrawler,
}
async def crawl_all_parties():
"""6๊ฐ ์ ๋น ๋์ ํฌ๋กค๋ง"""
logger.info("=" * 60)
logger.info("ํตํฉ ์ ๋น ํฌ๋กค๋ฌ ์์")
logger.info(" + ".join(CRAWLERS.keys()))
logger.info("=" * 60)
start_time = datetime.now()
crawlers = [cls() for cls in CRAWLERS.values()]
party_names = list(CRAWLERS.keys())
results = await asyncio.gather(
*[crawler.run_incremental() for crawler in crawlers],
return_exceptions=True
)
for party, result in zip(party_names, results):
if isinstance(result, Exception):
logger.error(f"{party} ํฌ๋กค๋ง ์คํจ: {result}")
else:
logger.info(f"{party} ํฌ๋กค๋ง ์๋ฃ")
duration = (datetime.now() - start_time).total_seconds()
logger.info("=" * 60)
logger.info(f"์ ์ฒด ํฌ๋กค๋ง ์๋ฃ")
logger.info(f"์์ ์๊ฐ: {duration:.1f}์ด ({duration / 60:.1f}๋ถ)")
logger.info("=" * 60)
# ํ์ ํธํ์ฑ ์ ์ง
async def crawl_both_parties():
await crawl_all_parties()
async def main():
await crawl_all_parties()
if __name__ == "__main__":
asyncio.run(main())
|