Spaces:
Sleeping
Sleeping
File size: 2,146 Bytes
fc361bb a9d5e1b fc361bb a9d5e1b fc361bb a9d5e1b fc361bb a9d5e1b fc361bb a9d5e1b fc361bb a9d5e1b fc361bb a9d5e1b fc361bb a9d5e1b fc361bb a9d5e1b fc361bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
from __future__ import annotations
from dataclasses import dataclass
import os
from dotenv import load_dotenv
@dataclass(frozen=True)
class Settings:
openrouter_api_key: str
openrouter_model: str | None
max_images: int
dpi: int
ocr_lang: str
min_text_chars_for_digital: int
topk_per_statement: int
# block logic knobs
max_blocks_per_statement: int
continuation_max_forward: int
DEFAULT_FREE_VISION_MODELS = [
# Free + vision-capable (as of their OpenRouter pages / availability changes over time)
"google/gemma-3-12b-it:free",
"nvidia/nemotron-nano-12b-v2-vl:free",
"amazon/nova-2-lite-v1:free",
]
def load_settings(**kwargs) -> Settings:
load_dotenv()
api_key = kwargs.get("openrouter_api_key") or os.getenv("OPENROUTER_API_KEY", "").strip()
if not api_key:
raise RuntimeError("Missing OPENROUTER_API_KEY in environment/.env")
model = kwargs.get("openrouter_model") or os.getenv("OPENROUTER_MODEL", "").strip() or None
max_images = kwargs.get("max_images") or int(os.getenv("MAX_IMAGES", "12"))
dpi = kwargs.get("dpi") or int(os.getenv("PDF_RENDER_DPI", "200"))
ocr_lang = kwargs.get("ocr_lang") or os.getenv("OCR_LANG", "eng")
min_text_chars_for_digital = kwargs.get("min_text_chars_for_digital") or int(
os.getenv("MIN_TEXT_CHARS_FOR_DIGITAL", "80")
)
topk_per_statement = kwargs.get("topk_per_statement") or int(os.getenv("TOPK_PER_STATEMENT", "3"))
max_blocks_per_statement = kwargs.get("max_blocks_per_statement") or int(
os.getenv("MAX_BLOCKS_PER_STATEMENT", "2")
)
continuation_max_forward = kwargs.get("continuation_max_forward") or int(
os.getenv("CONTINUATION_MAX_FORWARD", "6")
)
return Settings(
openrouter_api_key=api_key,
openrouter_model=model,
max_images=max_images,
dpi=dpi,
ocr_lang=ocr_lang,
min_text_chars_for_digital=min_text_chars_for_digital,
topk_per_statement=topk_per_statement,
max_blocks_per_statement=max_blocks_per_statement,
continuation_max_forward=continuation_max_forward,
)
|