LogicGoInfotechSpaces's picture
Update src/config.py
25661b0 verified
"""Centralised configuration for the duplicate detector."""
from __future__ import annotations
import os
from dataclasses import dataclass
from decimal import Decimal
from typing import Tuple
from dotenv import load_dotenv
load_dotenv()
def _get_decimal(env_key: str, default: str) -> Decimal:
raw_value = os.getenv(env_key, default)
try:
return Decimal(raw_value)
except Exception as exc: # pragma: no cover - defensive logging
raise ValueError(f"Invalid decimal for {env_key}: {raw_value}") from exc
def _get_int(env_key: str, default: str) -> int:
raw_value = os.getenv(env_key, default)
try:
return int(raw_value)
except Exception as exc: # pragma: no cover - defensive logging
raise ValueError(f"Invalid int for {env_key}: {raw_value}") from exc
def _get_tuple(env_key: str, default: str) -> Tuple[str, ...]:
raw_value = os.getenv(env_key, default)
values = [value.strip() for value in raw_value.split(",") if value.strip()]
if not values:
raise ValueError(f"{env_key} must contain at least one value")
return tuple(values)
@dataclass(frozen=True)
class Settings:
mongo_uri: str = os.getenv("MONGO_URI")
mongo_db: str = os.getenv("MONGO_DB", "expense")
expense_collection: str = os.getenv("MONGO_EXPENSE_COLLECTION", "transactions")
alias_collection: str = os.getenv("MONGO_ALIAS_COLLECTION", "merchant_aliases")
suggestion_collection: str = os.getenv("MONGO_SUGGESTION_COLLECTION", "merge_suggestions")
amount_tolerance_pct: Decimal = _get_decimal("AMOUNT_TOLERANCE_PCT", "1.0")
time_tolerance_minutes: int = _get_int("TIME_TOLERANCE_MINUTES", "10")
default_lookback_hours: int = _get_int("DEFAULT_LOOKBACK_HOURS", "48")
service_name: str = os.getenv("SERVICE_NAME", "duplicate-detector")
max_batch_size: int = _get_int("MAX_BATCH_SIZE", "5000")
time_fields: Tuple[str, ...] = _get_tuple(
"TIME_FIELDS",
"date,expense_time,createdAt",
)
merchant_fields: Tuple[str, ...] = _get_tuple(
"MERCHANT_FIELDS",
"merchant,note,paymentType,type,to",
)
scheduler_interval_seconds: int = _get_int("SCHEDULER_INTERVAL_SECONDS", "60")
settings = Settings()