version1 / main.py
krishna3103's picture
Upload 9 files
b56d4a6 verified
import logging
from datetime import datetime
from scraper import BloomingtonScraper
from processor import DataProcessor
from generator import QAPairGenerator
from config import LOG_DIR
def setup_logging() -> None:
"""Set up logging configuration"""
log_file = LOG_DIR / f"main_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(log_file),
logging.StreamHandler() # Also print to console
]
)
def main():
setup_logging()
logging.info("Starting Bloomington Tourist Guide data collection and QA pair generation")
try:
# Step 1: Data Collection
logging.info("Starting data collection...")
scraper = BloomingtonScraper()
scraper.scrape_all_categories()
search_stats = scraper.get_search_stats()
logging.info(f"Data collection completed. Search stats: {search_stats}")
# Step 2: Data Processing
logging.info("Starting data processing...")
processor = DataProcessor()
processor.process_all_categories()
logging.info("Data processing completed")
# Step 3: QA Pair Generation
logging.info("Starting QA pair generation...")
generator = QAPairGenerator()
generator.generate_all_pairs()
logging.info("QA pair generation completed")
except Exception as e:
logging.error(f"Error in main execution: {e}", exc_info=True)
raise
logging.info("Pipeline completed successfully")
if __name__ == "__main__":
main()