File size: 1,970 Bytes
3a36548
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""

๋”๋ถˆ์–ด๋ฏผ์ฃผ๋‹น ํฌ๋กค๋Ÿฌ ์Šค์ผ€์ค„๋Ÿฌ

- ๋งค์ผ ์ง€์ •๋œ ์‹œ๊ฐ„์— ์ž๋™ ์‹คํ–‰

- ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์‹คํ–‰ ์ง€์›

- ๋กœ๊ทธ ๊ธฐ๋ก

"""

import asyncio
import logging
from datetime import datetime
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
from minjoo_crawler_async import MinjooAsyncCrawler

# ๋กœ๊น… ์„ค์ •
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    handlers=[
        logging.FileHandler('crawler_scheduler.log', encoding='utf-8'),
        logging.StreamHandler()
    ]
)

logger = logging.getLogger(__name__)

async def scheduled_task():
    """์Šค์ผ€์ค„๋œ ์ž‘์—…"""
    logger.info("="*60)
    logger.info("์Šค์ผ€์ค„๋œ ํฌ๋กค๋ง ์‹œ์ž‘")
    logger.info("="*60)

    try:
        crawler = MinjooAsyncCrawler()
        await crawler.run_incremental()
        logger.info("ํฌ๋กค๋ง ์™„๋ฃŒ")
    except Exception as e:
        logger.error(f"ํฌ๋กค๋ง ์‹คํŒจ: {e}", exc_info=True)

def main():
    """์Šค์ผ€์ค„๋Ÿฌ ๋ฉ”์ธ"""
    scheduler = AsyncIOScheduler()

    # ๋งค์ผ ์˜ค์ „ 9์‹œ์— ์‹คํ–‰
    scheduler.add_job(
        scheduled_task,
        trigger=CronTrigger(hour=9, minute=0),
        id='daily_crawl',
        name='๋ฏผ์ฃผ๋‹น ํฌ๋กค๋Ÿฌ ์ผ์ผ ์‹คํ–‰',
        replace_existing=True
    )

    # ์ฆ‰์‹œ ํ•œ ๋ฒˆ ์‹คํ–‰ (ํ…Œ์ŠคํŠธ์šฉ)
    # scheduler.add_job(scheduled_task, 'date', run_date=datetime.now())

    logger.info("์Šค์ผ€์ค„๋Ÿฌ ์‹œ์ž‘")
    logger.info("๋งค์ผ ์˜ค์ „ 9์‹œ์— ํฌ๋กค๋ง ์‹คํ–‰")
    logger.info("์ข…๋ฃŒํ•˜๋ ค๋ฉด Ctrl+C๋ฅผ ๋ˆ„๋ฅด์„ธ์š”")

    scheduler.start()

    try:
        # ์ด๋ฒคํŠธ ๋ฃจํ”„ ์‹คํ–‰
        asyncio.get_event_loop().run_forever()
    except (KeyboardInterrupt, SystemExit):
        logger.info("์Šค์ผ€์ค„๋Ÿฌ ์ข…๋ฃŒ")

if __name__ == "__main__":
    main()