File size: 1,745 Bytes
b56d4a6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import logging
from datetime import datetime
from scraper import BloomingtonScraper
from processor import DataProcessor
from generator import QAPairGenerator
from config import LOG_DIR
def setup_logging() -> None:
"""Set up logging configuration"""
log_file = LOG_DIR / f"main_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(log_file),
logging.StreamHandler() # Also print to console
]
)
def main():
setup_logging()
logging.info("Starting Bloomington Tourist Guide data collection and QA pair generation")
try:
# Step 1: Data Collection
logging.info("Starting data collection...")
scraper = BloomingtonScraper()
scraper.scrape_all_categories()
search_stats = scraper.get_search_stats()
logging.info(f"Data collection completed. Search stats: {search_stats}")
# Step 2: Data Processing
logging.info("Starting data processing...")
processor = DataProcessor()
processor.process_all_categories()
logging.info("Data processing completed")
# Step 3: QA Pair Generation
logging.info("Starting QA pair generation...")
generator = QAPairGenerator()
generator.generate_all_pairs()
logging.info("QA pair generation completed")
except Exception as e:
logging.error(f"Error in main execution: {e}", exc_info=True)
raise
logging.info("Pipeline completed successfully")
if __name__ == "__main__":
main() |