from __future__ import annotations from dataclasses import dataclass import os from dotenv import load_dotenv @dataclass(frozen=True) class Settings: openrouter_api_key: str openrouter_model: str | None max_images: int dpi: int ocr_lang: str min_text_chars_for_digital: int topk_per_statement: int # block logic knobs max_blocks_per_statement: int continuation_max_forward: int DEFAULT_FREE_VISION_MODELS = [ # Free + vision-capable (as of their OpenRouter pages / availability changes over time) "google/gemma-3-12b-it:free", "nvidia/nemotron-nano-12b-v2-vl:free", "amazon/nova-2-lite-v1:free", ] def load_settings(**kwargs) -> Settings: load_dotenv() api_key = kwargs.get("openrouter_api_key") or os.getenv("OPENROUTER_API_KEY", "").strip() if not api_key: raise RuntimeError("Missing OPENROUTER_API_KEY in environment/.env") model = kwargs.get("openrouter_model") or os.getenv("OPENROUTER_MODEL", "").strip() or None max_images = kwargs.get("max_images") or int(os.getenv("MAX_IMAGES", "12")) dpi = kwargs.get("dpi") or int(os.getenv("PDF_RENDER_DPI", "200")) ocr_lang = kwargs.get("ocr_lang") or os.getenv("OCR_LANG", "eng") min_text_chars_for_digital = kwargs.get("min_text_chars_for_digital") or int( os.getenv("MIN_TEXT_CHARS_FOR_DIGITAL", "80") ) topk_per_statement = kwargs.get("topk_per_statement") or int(os.getenv("TOPK_PER_STATEMENT", "3")) max_blocks_per_statement = kwargs.get("max_blocks_per_statement") or int( os.getenv("MAX_BLOCKS_PER_STATEMENT", "2") ) continuation_max_forward = kwargs.get("continuation_max_forward") or int( os.getenv("CONTINUATION_MAX_FORWARD", "6") ) return Settings( openrouter_api_key=api_key, openrouter_model=model, max_images=max_images, dpi=dpi, ocr_lang=ocr_lang, min_text_chars_for_digital=min_text_chars_for_digital, topk_per_statement=topk_per_statement, max_blocks_per_statement=max_blocks_per_statement, continuation_max_forward=continuation_max_forward, )