| import os | |
| import logging | |
| def set_tokenizer_parallelism(enabled=False): | |
| """ | |
| Configure tokenizer parallelism to avoid fork-related warnings. | |
| """ | |
| os.environ["TOKENIZERS_PARALLELISM"] = str(enabled).lower() | |
| logger = logging.getLogger("ConfigUtils") | |
| if not logger.handlers: | |
| logging.basicConfig(level=logging.INFO) | |
| logger.info(f"Tokenizers parallelism set to: {enabled}") | |
| def load_urls_from_file(file_path, logger_name="ConfigUtils"): | |
| """ | |
| Load URLs from a text file, ignoring empty lines and comments. | |
| """ | |
| logger = logging.getLogger(logger_name) | |
| if not logger.handlers: | |
| logging.basicConfig(level=logging.INFO) | |
| urls = [] | |
| try: | |
| with open(file_path, 'r') as f: | |
| for line in f: | |
| line = line.strip() | |
| if line and not line.startswith('#'): | |
| urls.append(line) | |
| logger.info(f"Loaded {len(urls)} URLs from {file_path}") | |
| for i, url in enumerate(urls): | |
| logger.debug(f" URL {i+1}: {url}") | |
| return urls | |
| except Exception as e: | |
| logger.error(f"Error loading URLs from {file_path}: {str(e)}") | |
| return [] |