File size: 1,745 Bytes
b56d4a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import logging
from datetime import datetime

from scraper import BloomingtonScraper
from processor import DataProcessor
from generator import QAPairGenerator
from config import LOG_DIR

def setup_logging() -> None:
    """Set up logging configuration"""
    log_file = LOG_DIR / f"main_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler(log_file),
            logging.StreamHandler()  # Also print to console
        ]
    )

def main():
    setup_logging()
    logging.info("Starting Bloomington Tourist Guide data collection and QA pair generation")
    
    try:
        # Step 1: Data Collection
        logging.info("Starting data collection...")
        scraper = BloomingtonScraper()
        scraper.scrape_all_categories()
        search_stats = scraper.get_search_stats()
        logging.info(f"Data collection completed. Search stats: {search_stats}")
        
        # Step 2: Data Processing
        logging.info("Starting data processing...")
        processor = DataProcessor()
        processor.process_all_categories()
        logging.info("Data processing completed")
        
        # Step 3: QA Pair Generation
        logging.info("Starting QA pair generation...")
        generator = QAPairGenerator()
        generator.generate_all_pairs()
        logging.info("QA pair generation completed")
        
    except Exception as e:
        logging.error(f"Error in main execution: {e}", exc_info=True)
        raise
    
    logging.info("Pipeline completed successfully")

if __name__ == "__main__":
    main()