"""Centralised configuration for the duplicate detector.""" from __future__ import annotations import os from dataclasses import dataclass from decimal import Decimal from typing import Tuple from dotenv import load_dotenv load_dotenv() def _get_decimal(env_key: str, default: str) -> Decimal: raw_value = os.getenv(env_key, default) try: return Decimal(raw_value) except Exception as exc: # pragma: no cover - defensive logging raise ValueError(f"Invalid decimal for {env_key}: {raw_value}") from exc def _get_int(env_key: str, default: str) -> int: raw_value = os.getenv(env_key, default) try: return int(raw_value) except Exception as exc: # pragma: no cover - defensive logging raise ValueError(f"Invalid int for {env_key}: {raw_value}") from exc def _get_tuple(env_key: str, default: str) -> Tuple[str, ...]: raw_value = os.getenv(env_key, default) values = [value.strip() for value in raw_value.split(",") if value.strip()] if not values: raise ValueError(f"{env_key} must contain at least one value") return tuple(values) @dataclass(frozen=True) class Settings: mongo_uri: str = os.getenv("MONGO_URI") mongo_db: str = os.getenv("MONGO_DB", "expense") expense_collection: str = os.getenv("MONGO_EXPENSE_COLLECTION", "transactions") alias_collection: str = os.getenv("MONGO_ALIAS_COLLECTION", "merchant_aliases") suggestion_collection: str = os.getenv("MONGO_SUGGESTION_COLLECTION", "merge_suggestions") amount_tolerance_pct: Decimal = _get_decimal("AMOUNT_TOLERANCE_PCT", "1.0") time_tolerance_minutes: int = _get_int("TIME_TOLERANCE_MINUTES", "10") default_lookback_hours: int = _get_int("DEFAULT_LOOKBACK_HOURS", "48") service_name: str = os.getenv("SERVICE_NAME", "duplicate-detector") max_batch_size: int = _get_int("MAX_BATCH_SIZE", "5000") time_fields: Tuple[str, ...] = _get_tuple( "TIME_FIELDS", "date,expense_time,createdAt", ) merchant_fields: Tuple[str, ...] = _get_tuple( "MERCHANT_FIELDS", "merchant,note,paymentType,type,to", ) scheduler_interval_seconds: int = _get_int("SCHEDULER_INTERVAL_SECONDS", "60") settings = Settings()