File size: 2,146 Bytes
fc361bb
a9d5e1b
fc361bb
 
 
 
a9d5e1b
fc361bb
 
 
 
 
 
 
 
 
 
a9d5e1b
 
 
 
 
fc361bb
a9d5e1b
fc361bb
 
 
 
 
a9d5e1b
fc361bb
 
a9d5e1b
fc361bb
 
 
 
 
 
 
 
a9d5e1b
 
 
fc361bb
 
a9d5e1b
 
 
 
 
 
 
fc361bb
 
 
 
 
 
 
 
a9d5e1b
 
fc361bb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from __future__ import annotations

from dataclasses import dataclass
import os
from dotenv import load_dotenv


@dataclass(frozen=True)
class Settings:
    openrouter_api_key: str
    openrouter_model: str | None
    max_images: int
    dpi: int
    ocr_lang: str
    min_text_chars_for_digital: int
    topk_per_statement: int

    # block logic knobs
    max_blocks_per_statement: int
    continuation_max_forward: int


DEFAULT_FREE_VISION_MODELS = [
    # Free + vision-capable (as of their OpenRouter pages / availability changes over time)
    "google/gemma-3-12b-it:free",
    "nvidia/nemotron-nano-12b-v2-vl:free",
    "amazon/nova-2-lite-v1:free",
]


def load_settings(**kwargs) -> Settings:
    load_dotenv()

    api_key = kwargs.get("openrouter_api_key") or os.getenv("OPENROUTER_API_KEY", "").strip()
    if not api_key:
        raise RuntimeError("Missing OPENROUTER_API_KEY in environment/.env")

    model = kwargs.get("openrouter_model") or os.getenv("OPENROUTER_MODEL", "").strip() or None
    max_images = kwargs.get("max_images") or int(os.getenv("MAX_IMAGES", "12"))
    dpi = kwargs.get("dpi") or int(os.getenv("PDF_RENDER_DPI", "200"))
    ocr_lang = kwargs.get("ocr_lang") or os.getenv("OCR_LANG", "eng")
    min_text_chars_for_digital = kwargs.get("min_text_chars_for_digital") or int(
        os.getenv("MIN_TEXT_CHARS_FOR_DIGITAL", "80")
    )
    topk_per_statement = kwargs.get("topk_per_statement") or int(os.getenv("TOPK_PER_STATEMENT", "3"))

    max_blocks_per_statement = kwargs.get("max_blocks_per_statement") or int(
        os.getenv("MAX_BLOCKS_PER_STATEMENT", "2")
    )
    continuation_max_forward = kwargs.get("continuation_max_forward") or int(
        os.getenv("CONTINUATION_MAX_FORWARD", "6")
    )

    return Settings(
        openrouter_api_key=api_key,
        openrouter_model=model,
        max_images=max_images,
        dpi=dpi,
        ocr_lang=ocr_lang,
        min_text_chars_for_digital=min_text_chars_for_digital,
        topk_per_statement=topk_per_statement,
        max_blocks_per_statement=max_blocks_per_statement,
        continuation_max_forward=continuation_max_forward,
    )