|
|
import logging
|
|
|
from datetime import datetime
|
|
|
|
|
|
from scraper import BloomingtonScraper
|
|
|
from processor import DataProcessor
|
|
|
from generator import QAPairGenerator
|
|
|
from config import LOG_DIR
|
|
|
|
|
|
def setup_logging() -> None:
|
|
|
"""Set up logging configuration"""
|
|
|
log_file = LOG_DIR / f"main_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
|
|
|
logging.basicConfig(
|
|
|
level=logging.INFO,
|
|
|
format='%(asctime)s - %(levelname)s - %(message)s',
|
|
|
handlers=[
|
|
|
logging.FileHandler(log_file),
|
|
|
logging.StreamHandler()
|
|
|
]
|
|
|
)
|
|
|
|
|
|
def main():
|
|
|
setup_logging()
|
|
|
logging.info("Starting Bloomington Tourist Guide data collection and QA pair generation")
|
|
|
|
|
|
try:
|
|
|
|
|
|
logging.info("Starting data collection...")
|
|
|
scraper = BloomingtonScraper()
|
|
|
scraper.scrape_all_categories()
|
|
|
search_stats = scraper.get_search_stats()
|
|
|
logging.info(f"Data collection completed. Search stats: {search_stats}")
|
|
|
|
|
|
|
|
|
logging.info("Starting data processing...")
|
|
|
processor = DataProcessor()
|
|
|
processor.process_all_categories()
|
|
|
logging.info("Data processing completed")
|
|
|
|
|
|
|
|
|
logging.info("Starting QA pair generation...")
|
|
|
generator = QAPairGenerator()
|
|
|
generator.generate_all_pairs()
|
|
|
logging.info("QA pair generation completed")
|
|
|
|
|
|
except Exception as e:
|
|
|
logging.error(f"Error in main execution: {e}", exc_info=True)
|
|
|
raise
|
|
|
|
|
|
logging.info("Pipeline completed successfully")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main() |