import os import logging def set_tokenizer_parallelism(enabled=False): """ Configure tokenizer parallelism to avoid fork-related warnings. """ os.environ["TOKENIZERS_PARALLELISM"] = str(enabled).lower() logger = logging.getLogger("ConfigUtils") if not logger.handlers: logging.basicConfig(level=logging.INFO) logger.info(f"Tokenizers parallelism set to: {enabled}") def load_urls_from_file(file_path, logger_name="ConfigUtils"): """ Load URLs from a text file, ignoring empty lines and comments. """ logger = logging.getLogger(logger_name) if not logger.handlers: logging.basicConfig(level=logging.INFO) urls = [] try: with open(file_path, 'r') as f: for line in f: line = line.strip() if line and not line.startswith('#'): urls.append(line) logger.info(f"Loaded {len(urls)} URLs from {file_path}") for i, url in enumerate(urls): logger.debug(f" URL {i+1}: {url}") return urls except Exception as e: logger.error(f"Error loading URLs from {file_path}: {str(e)}") return []