File size: 5,792 Bytes
d44b33d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
"""Application configuration loaded from environment variables and ``.env``.

``Settings`` is the single source of truth for LLM provider choice, Chroma paths,
chunking limits, upload caps, and SQLite locations. Use :func:`get_settings` (cached)
from route handlers and RAG modules instead of reading ``os.environ`` directly.
"""

import os
from functools import lru_cache
from typing import Any, Self

from pydantic import Field, model_validator

from pydantic_settings import BaseSettings, SettingsConfigDict


class Settings(BaseSettings):
    """Pydantic-settings model for DocuAudit AI; fields map to env vars (case-insensitive)."""

    model_config = SettingsConfigDict(
        env_file=".env",
        env_file_encoding="utf-8",
        extra="ignore",
        case_sensitive=False,
        populate_by_name=True,
    )

    @model_validator(mode="before")
    @classmethod
    def _map_max_upload_env_alias(cls, data: Any) -> Any:
        if not isinstance(data, dict):
            return data
        out = dict(data)
        if out.get("max_file_size_mb") in (None, "") and out.get("max_upload_size_mb") not in (None, ""):
            out["max_file_size_mb"] = out.pop("max_upload_size_mb")
        elif "max_upload_size_mb" in out and "max_file_size_mb" not in out:
            out["max_file_size_mb"] = out.pop("max_upload_size_mb")
        return out

    app_name: str = Field(default="DocuAudit AI", description="FastAPI title and product name")
    app_version: str = Field(default="1.0.0", description="Application version")
    app_description: str = Field(
        default=(
            "Multi-document RAG API for high-stakes consulting environments. "
            "Every answer is grounded in source documents with full audit trails."
        ),
        description="OpenAPI /docs description",
    )
    llm_provider: str = Field(default="ollama", description="Embedding provider")
    
    openai_api_key: str | None = Field(default=None, description="OpenAI API key")
    openai_model: str = "gpt-4o"
    openai_embedding_model: str = "text-embedding-3-small"
    
    anthropic_api_key: str = ""
    anthropic_model: str = "claude-3-5-sonnet-20241022"

    huggingface_api_key: str = ""
    huggingface_model: str = Field(
        default="meta-llama/Meta-Llama-3-8B-Instruct",
        description=(
            "HF chat model id (use a repo your Hub account already has access to; Llama 3.1 needs the "
            "separate Llama 3.1 gate). Chat tries hf-inference then router auto when unset."
        ),
    )
    huggingface_embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2"
    huggingface_inference_provider: str | None = Field(
        default=None,
        description=(
            "Optional huggingface_hub InferenceClient provider (e.g. hf-inference, together). "
            "Unset uses hf-inference in chat code; set to `auto` for router auto-routing."
        ),
    )

    ollama_base_url: str = Field(default="http://localhost:11434", description="Ollama base URL")
    ollama_chat_model: str = "llama3.1:8b"
    ollama_embedding_model: str = "nomic-embed-text"

    chroma_persist_directory: str = Field(default="./data/chroma", description="Chroma persistence path")

    chroma_persist_dir: str = Field(default="./chroma", description="Chroma persistence path")
    chroma_collection_name: str = "docuaudit_docs"

    chunk_size: int = Field(default=1000, ge=100, le=8000, description="Chunk size for splitting")
    chunk_overlap: int = Field(default=200, ge=0, le=2000, description="Chunk overlap for splitting")
    top_k_results: int = Field(default=5, ge=1, le=20, description="Default number of chunks to retrieve")

    audit_db_path: str = "./audit.db"
    jobs_db_path: str = Field(default="./data/jobs.db", description="SQLite path for ingest job tracking")

    max_file_size_mb: int = Field(default=50, ge=1, le=200, description="Max upload file size (MB)")
    max_documents_per_batch: int = Field(default=100, ge=1, le=1000, description="Max documents per batch")
    ingest_user_agent: str = Field(
        default="DocuAudit AI docuaudit-ingest@example.com",
        description=(
            "HTTP User-Agent for POST /ingest/url downloads. SEC.gov requires "
            "'Company Name contact@email.com' with a reachable address (see sec.gov/os/accessing-edgar-data)."
        ),
    )

    @model_validator(mode="after")
    def _space_default_llm_provider(self) -> Self:
        """Hugging Face Spaces do not run Ollama locally; use Hub inference unless the user set LLM_PROVIDER."""
        if not (os.environ.get("SPACE_ID") or "").strip():
            return self
        if "LLM_PROVIDER" in os.environ:
            return self
        if self.llm_provider.lower() != "ollama":
            return self
        self.llm_provider = "huggingface"
        return self

    @model_validator(mode="after")
    def _huggingface_token_from_hub_env(self) -> Self:
        """When using the Hugging Face inference stack, accept the Hub token from standard env names.

        Spaces often expose `HF_TOKEN` (read/write per Space secrets). Map it into `huggingface_api_key`
        when `HUGGINGFACE_API_KEY` is unset so embedder/chat clients receive a token.
        """
        if self.llm_provider.lower() != "huggingface":
            return self
        if (self.huggingface_api_key or "").strip():
            return self
        for key in ("HF_TOKEN", "HUGGING_FACE_HUB_TOKEN"):
            token = (os.environ.get(key) or "").strip()
            if token:
                self.huggingface_api_key = token
                break
        return self


@lru_cache
def get_settings() -> Settings:
    """Return the process-wide settings singleton (cleared in tests via ``cache_clear()``)."""
    return Settings()