#utils/helpers.py from datetime import datetime import logging import os import time from functools import wraps # -------------------- # Ensure folder exists # -------------------- def ensure_folder(path): if not os.path.exists(path): os.makedirs(path) # -------------------- # Logger setup # -------------------- def setup_logger(log_file: str = None): logger = logging.getLogger("reddit_logger") logger.setLevel(logging.INFO) if not logger.handlers: # Console handler ch = logging.StreamHandler() ch.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') ch.setFormatter(formatter) logger.addHandler(ch) # File handler (optional) if log_file: ensure_folder(os.path.dirname(log_file)) fh = logging.FileHandler(log_file) fh.setLevel(logging.INFO) fh.setFormatter(formatter) logger.addHandler(fh) return logger # Initialize logger (console + optional file) logger = setup_logger("logs/reddit_scraper.log") # -------------------- # Convert timestamp to datetime # -------------------- def timestamp_to_datetime(ts): return datetime.utcfromtimestamp(ts) # -------------------- # Retry decorator for API calls # -------------------- def retry(exceptions, tries=3, delay=2, backoff=2, logger=None): """ Retry decorator for functions that may fail due to network/API issues. exceptions: tuple of exception types to catch tries: number of attempts delay: initial delay between retries backoff: multiplier for delay after each failure """ def decorator(func): @wraps(func) def wrapper(*args, **kwargs): _tries, _delay = tries, delay while _tries > 0: try: return func(*args, **kwargs) except exceptions as e: msg = f"{func.__name__} failed with {e}, retrying in {_delay} seconds..." if logger: logger.warning(msg) else: print(msg) time.sleep(_delay) _tries -= 1 _delay *= backoff msg = f"{func.__name__} failed after {tries} attempts." if logger: logger.error(msg) else: print(msg) raise return wrapper return decorator