File size: 2,942 Bytes
7fd3f6f
 
 
 
 
029948d
7fd3f6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
029948d
 
7fd3f6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from __future__ import annotations

import os
from dataclasses import dataclass
from pathlib import Path
from .hf_env_files import resolve_json_or_path


@dataclass(frozen=True)
class Settings:
    # Repo paths
    repo_root: Path
    backend_dir: Path
    worker_dir: Path

    # Gmail
    credentials_path: Path
    token_path: Path

    label_incoming: str
    label_known: str
    label_unknown: str
    label_train: str

    # Notification
    notify_to_email: str
    notify_from_email: str

    # Trainer
    trainer_base_url: str

    # OpenAI
    openai_api_key: str
    openai_model: str

    # Worker behavior
    poll_seconds: int
    max_messages_per_poll: int
    render_pages: int
    render_dpi: int


def load_settings(repo_root: Path) -> Settings:
    backend_dir = repo_root / "backend"
    worker_dir = backend_dir / "worker"

    # IMPORTANT: use the SAME env var you actually store in backend/.env
    # Your file shows OPENAI_API_KEY_TEST=...
    openai_api_key = os.environ.get("OPENAI_API_KEY_TEST", "").strip()
    if not openai_api_key:
        raise RuntimeError("Missing OPENAI_API_KEY_TEST env var in backend/.env")

    notify_to = os.environ.get("PDF_PIPELINE_NOTIFY_TO", "").strip()
    if not notify_to:
        raise RuntimeError("Missing PDF_PIPELINE_NOTIFY_TO env var")

    notify_from = os.environ.get("PDF_PIPELINE_NOTIFY_FROM", "").strip()
    if not notify_from:
        raise RuntimeError("Missing PDF_PIPELINE_NOTIFY_FROM env var")

    trainer_base_url = os.environ.get("PDF_TRAINER_BASE_URL", "http://localhost:5173").strip()
    if not trainer_base_url:
        raise RuntimeError("Missing PDF_TRAINER_BASE_URL env var")

    return Settings(
        repo_root=repo_root,
        backend_dir=backend_dir,
        worker_dir=worker_dir,

        credentials_path=resolve_json_or_path("GMAIL_CREDENTIALS_JSON", backend_dir / "credentials.json", Path("/tmp/credentials.json")),
        token_path=resolve_json_or_path("GMAIL_TOKEN_JSON", backend_dir / "token.json", Path("/tmp/token.json")),

        label_incoming=os.environ.get("PDF_PIPELINE_LABEL_INCOMING", "PDF_PIPELINE/INCOMING"),
        label_known=os.environ.get("PDF_PIPELINE_LABEL_KNOWN", "PDF_PIPELINE/KNOWN"),
        label_unknown=os.environ.get("PDF_PIPELINE_LABEL_UNKNOWN", "PDF_PIPELINE/UNKNOWN"),
        label_train=os.environ.get("PDF_PIPELINE_LABEL_TRAIN", "PDF_PIPELINE/TRAIN"),

        notify_to_email=notify_to,
        notify_from_email=notify_from,

        trainer_base_url=trainer_base_url,

        openai_api_key=openai_api_key,
        openai_model=os.environ.get("OPENAI_MODEL", "gpt-4.1-mini"),

        poll_seconds=int(os.environ.get("PDF_PIPELINE_POLL_SECONDS", "20")),
        max_messages_per_poll=int(os.environ.get("PDF_PIPELINE_MAX_PER_POLL", "5")),
        render_pages=int(os.environ.get("PDF_PIPELINE_RENDER_PAGES", "2")),
        render_dpi=int(os.environ.get("PDF_PIPELINE_RENDER_DPI", "200")),
    )