chaaim123 commited on
Commit
8d26cc7
·
verified ·
1 Parent(s): ec123f6

Create utils/config_utils.py

Browse files
Files changed (1) hide show
  1. utils/config_utils.py +37 -0
utils/config_utils.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+
4
+ def set_tokenizer_parallelism(enabled=False):
5
+ """
6
+ Configure tokenizer parallelism to avoid fork-related warnings.
7
+ """
8
+ os.environ["TOKENIZERS_PARALLELISM"] = str(enabled).lower()
9
+
10
+ logger = logging.getLogger("ConfigUtils")
11
+ if not logger.handlers:
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger.info(f"Tokenizers parallelism set to: {enabled}")
14
+
15
+
16
+ def load_urls_from_file(file_path, logger_name="ConfigUtils"):
17
+ """
18
+ Load URLs from a text file, ignoring empty lines and comments.
19
+ """
20
+ logger = logging.getLogger(logger_name)
21
+ if not logger.handlers:
22
+ logging.basicConfig(level=logging.INFO)
23
+
24
+ urls = []
25
+ try:
26
+ with open(file_path, 'r') as f:
27
+ for line in f:
28
+ line = line.strip()
29
+ if line and not line.startswith('#'):
30
+ urls.append(line)
31
+ logger.info(f"Loaded {len(urls)} URLs from {file_path}")
32
+ for i, url in enumerate(urls):
33
+ logger.debug(f" URL {i+1}: {url}")
34
+ return urls
35
+ except Exception as e:
36
+ logger.error(f"Error loading URLs from {file_path}: {str(e)}")
37
+ return []