lawlevisan commited on
Commit
a42c3a0
·
verified ·
1 Parent(s): e8b553c

Upload helpers.py

Browse files
Files changed (1) hide show
  1. src/helpers.py +82 -0
src/helpers.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #utils/helpers.py
2
+ from datetime import datetime
3
+ import logging
4
+ import os
5
+ import time
6
+ from functools import wraps
7
+ # --------------------
8
+ # Ensure folder exists
9
+ # --------------------
10
+ def ensure_folder(path):
11
+ if not os.path.exists(path):
12
+ os.makedirs(path)
13
+
14
+ # --------------------
15
+ # Logger setup
16
+ # --------------------
17
+ def setup_logger(log_file: str = None):
18
+ logger = logging.getLogger("reddit_logger")
19
+ logger.setLevel(logging.INFO)
20
+
21
+ if not logger.handlers:
22
+ # Console handler
23
+ ch = logging.StreamHandler()
24
+ ch.setLevel(logging.INFO)
25
+ formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
26
+ ch.setFormatter(formatter)
27
+ logger.addHandler(ch)
28
+
29
+ # File handler (optional)
30
+ if log_file:
31
+ ensure_folder(os.path.dirname(log_file))
32
+ fh = logging.FileHandler(log_file)
33
+ fh.setLevel(logging.INFO)
34
+ fh.setFormatter(formatter)
35
+ logger.addHandler(fh)
36
+
37
+ return logger
38
+
39
+ # Initialize logger (console + optional file)
40
+ logger = setup_logger("logs/reddit_scraper.log")
41
+
42
+ # --------------------
43
+ # Convert timestamp to datetime
44
+ # --------------------
45
+ def timestamp_to_datetime(ts):
46
+ return datetime.utcfromtimestamp(ts)
47
+
48
+ # --------------------
49
+ # Retry decorator for API calls
50
+ # --------------------
51
+ def retry(exceptions, tries=3, delay=2, backoff=2, logger=None):
52
+ """
53
+ Retry decorator for functions that may fail due to network/API issues.
54
+ exceptions: tuple of exception types to catch
55
+ tries: number of attempts
56
+ delay: initial delay between retries
57
+ backoff: multiplier for delay after each failure
58
+ """
59
+ def decorator(func):
60
+ @wraps(func)
61
+ def wrapper(*args, **kwargs):
62
+ _tries, _delay = tries, delay
63
+ while _tries > 0:
64
+ try:
65
+ return func(*args, **kwargs)
66
+ except exceptions as e:
67
+ msg = f"{func.__name__} failed with {e}, retrying in {_delay} seconds..."
68
+ if logger:
69
+ logger.warning(msg)
70
+ else:
71
+ print(msg)
72
+ time.sleep(_delay)
73
+ _tries -= 1
74
+ _delay *= backoff
75
+ msg = f"{func.__name__} failed after {tries} attempts."
76
+ if logger:
77
+ logger.error(msg)
78
+ else:
79
+ print(msg)
80
+ raise
81
+ return wrapper
82
+ return decorator