| import os |
| from functools import lru_cache |
| from typing import Any, Self |
|
|
| from pydantic import Field, model_validator |
|
|
| from pydantic_settings import BaseSettings, SettingsConfigDict |
|
|
|
|
| class Settings(BaseSettings): |
| model_config = SettingsConfigDict( |
| env_file=".env", |
| env_file_encoding="utf-8", |
| extra="ignore", |
| case_sensitive=False, |
| populate_by_name=True, |
| ) |
|
|
| @model_validator(mode="before") |
| @classmethod |
| def _map_max_upload_env_alias(cls, data: Any) -> Any: |
| if not isinstance(data, dict): |
| return data |
| out = dict(data) |
| if out.get("max_file_size_mb") in (None, "") and out.get("max_upload_size_mb") not in (None, ""): |
| out["max_file_size_mb"] = out.pop("max_upload_size_mb") |
| elif "max_upload_size_mb" in out and "max_file_size_mb" not in out: |
| out["max_file_size_mb"] = out.pop("max_upload_size_mb") |
| return out |
|
|
| app_name: str = Field(default="DocuAudit AI", description="FastAPI title and product name") |
| app_version: str = Field(default="1.0.0", description="Application version") |
| app_description: str = Field( |
| default=( |
| "Multi-document RAG API for high-stakes consulting environments. " |
| "Every answer is grounded in source documents with full audit trails." |
| ), |
| description="OpenAPI /docs description", |
| ) |
| llm_provider: str = Field(default="ollama", description="Embedding provider") |
| |
| openai_api_key: str | None = Field(default=None, description="OpenAI API key") |
| openai_model: str = "gpt-4o" |
| openai_embedding_model: str = "text-embedding-3-small" |
| |
| anthropic_api_key: str = "" |
| anthropic_model: str = "claude-3-5-sonnet-20241022" |
|
|
| huggingface_api_key: str = "" |
| huggingface_model: str = Field( |
| default="meta-llama/Meta-Llama-3-8B-Instruct", |
| description=( |
| "HF chat model id (use a repo your Hub account already has access to; Llama 3.1 needs the " |
| "separate Llama 3.1 gate). Chat tries hf-inference then router auto when unset." |
| ), |
| ) |
| huggingface_embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2" |
| huggingface_inference_provider: str | None = Field( |
| default=None, |
| description=( |
| "Optional huggingface_hub InferenceClient provider (e.g. hf-inference, together). " |
| "Unset uses hf-inference in chat code; set to `auto` for router auto-routing." |
| ), |
| ) |
|
|
| ollama_base_url: str = Field(default="http://localhost:11434", description="Ollama base URL") |
| ollama_chat_model: str = "llama3.1:8b" |
| ollama_embedding_model: str = "nomic-embed-text" |
|
|
| chroma_persist_directory: str = Field(default="./data/chroma", description="Chroma persistence path") |
|
|
| chroma_persist_dir: str = Field(default="./chroma", description="Chroma persistence path") |
| chroma_collection_name: str = "docuaudit_docs" |
|
|
| chunk_size: int = Field(default=1000, ge=100, le=8000, description="Chunk size for splitting") |
| chunk_overlap: int = Field(default=200, ge=0, le=2000, description="Chunk overlap for splitting") |
| top_k_results: int = Field(default=5, ge=1, le=20, description="Default number of chunks to retrieve") |
|
|
| audit_db_path: str = "./audit.db" |
| jobs_db_path: str = Field(default="./data/jobs.db", description="SQLite path for ingest job tracking") |
|
|
| max_file_size_mb: int = Field(default=50, ge=1, le=200, description="Max upload file size (MB)") |
| max_documents_per_batch: int = Field(default=100, ge=1, le=1000, description="Max documents per batch") |
|
|
| @model_validator(mode="after") |
| def _space_default_llm_provider(self) -> Self: |
| """Hugging Face Spaces do not run Ollama locally; use Hub inference unless the user set LLM_PROVIDER.""" |
| if not (os.environ.get("SPACE_ID") or "").strip(): |
| return self |
| if "LLM_PROVIDER" in os.environ: |
| return self |
| if self.llm_provider.lower() != "ollama": |
| return self |
| self.llm_provider = "huggingface" |
| return self |
|
|
| @model_validator(mode="after") |
| def _huggingface_token_from_hub_env(self) -> Self: |
| """When using the Hugging Face inference stack, accept the Hub token from standard env names. |
| |
| Spaces often expose `HF_TOKEN` (read/write per Space secrets). Map it into `huggingface_api_key` |
| when `HUGGINGFACE_API_KEY` is unset so embedder/chat clients receive a token. |
| """ |
| if self.llm_provider.lower() != "huggingface": |
| return self |
| if (self.huggingface_api_key or "").strip(): |
| return self |
| for key in ("HF_TOKEN", "HUGGING_FACE_HUB_TOKEN"): |
| token = (os.environ.get(key) or "").strip() |
| if token: |
| self.huggingface_api_key = token |
| break |
| return self |
|
|
|
|
| @lru_cache |
| def get_settings() -> Settings: |
| return Settings() |