PDF-Assit_RAG / backend /app /config.py
Paramjit Singh
Merge pull request #336 from Srushti-Kamble14/feat/celery-redis-pdf-processing
5926dae unverified
"""
Application configuration via pydantic-settings.
All config is loaded from environment variables with sensible defaults.
"""
import os
from pydantic_settings import BaseSettings
from functools import lru_cache
class Settings(BaseSettings):
# ── App ──────────────────────────────────────────────
APP_NAME: str = "Document AI Analyst"
SECRET_KEY: str = "change-me-in-production-please"
DEBUG: bool = False
ENVIRONMENT: str = "development"
ALLOWED_ORIGINS: str = "http://localhost:3000,http://localhost:7860"
# ── Database ─────────────────────────────────────────
DATABASE_URL: str = "sqlite:///./data/app.db"
# ── Auth ─────────────────────────────────────────────
JWT_ALGORITHM: str = "HS256"
JWT_ACCESS_EXPIRY_MINUTES: int = 15
JWT_REFRESH_EXPIRY_DAYS: int = 7
GOOGLE_CLIENT_ID: str = ""
HF_CLIENT_ID: str = ""
HF_CLIENT_SECRET: str = ""
HF_REDIRECT_URI: str = ""
FRONTEND_URL: str = "http://localhost:3000"
# Google Drive background sync
DRIVE_SYNC_ENABLED: bool = False
DRIVE_SYNC_INTERVAL_MINUTES: int = 60
GOOGLE_SERVICE_ACCOUNT_FILE: str = ""
# Celery / Redis background processing
CELERY_BROKER_URL: str = "redis://localhost:6379/0"
CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
CELERY_TASK_TRACK_STARTED: bool = True
# ── File Upload ──────────────────────────────────────
UPLOAD_DIR: str = "./data/uploads"
MAX_UPLOAD_SIZE_MB: int = 20
ALLOWED_EXTENSIONS: set = {"pdf", "docx", "txt", "md"}
ALLOWED_MIME_TYPES: dict = {
".pdf": ["application/pdf"],
".docx": [
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/zip",
],
".txt": ["text/plain"],
".md": ["text/markdown"],
}
# ── RAG Pipeline ─────────────────────────────────────
CHUNK_SIZE: int = 1000
CHUNK_OVERLAP: int = 200
TOP_K_RETRIEVAL: int = 10
TOP_K_RERANK: int = 5
# ── Knowledge Graph (GraphRAG) ───────────────────────
GRAPH_PERSIST_DIR: str = "./data/graphs"
GRAPH_ENTITY_LABELS: set = {
"PERSON",
"ORG",
"GPE",
"LOC",
"PRODUCT",
"EVENT",
"WORK_OF_ART",
"LAW",
"NORP",
"FAC",
}
GRAPH_MAX_RELATIONSHIPS: int = 12
# ── Embeddings (local HuggingFace model) ─────────────
EMBEDDING_MODEL: str = "sentence-transformers/all-MiniLM-L6-v2"
EMBEDDING_DIMENSION: int = 384
# ── ChromaDB ─────────────────────────────────────────
CHROMA_PERSIST_DIR: str = "./data/chroma_db"
# ── LLM (HuggingFace Inference API) ──────────────────
HF_TOKEN: str = os.getenv("HF_TOKEN", "") # HuggingFace API token (set in .env)
LLM_MODEL: str = "Qwen/Qwen2.5-72B-Instruct"
LLM_MAX_NEW_TOKENS: int = 1024
LLM_TEMPERATURE: float = 0.3
SUMMARY_MAX_TOKENS: int = 512
# ── LangSmith Tracing (optional) ─────────────────────
LANGSMITH_TRACING: bool = False
LANGSMITH_API_KEY: str = ""
LANGSMITH_ENDPOINT: str = "https://api.smith.langchain.com"
LANGSMITH_PROJECT: str = "pdf-assistant-rag"
# ── Reranker ─────────────────────────────────────────
RERANKER_MODEL: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"
# ── Vision / Image captioning ─────────────────────
VISION_PROVIDER: str | None = None # e.g. 'openai'
VISION_MODEL: str | None = None
OPENAI_API_KEY: str = ""
# ── Workspace Invitation ─────────────────────────
APP_URL: str = "http://localhost:3000"
INVITE_TOKEN_EXPIRY_HOURS: int = 72
EMAIL_FROM: str = "no-reply@example.com"
SMTP_HOST: str = ""
SMTP_PORT: int = 0
SMTP_USER: str = ""
SMTP_PASSWORD: str = ""
@property
def cors_origins(self) -> list[str]:
if self.ENVIRONMENT == "production":
return [o.strip() for o in self.ALLOWED_ORIGINS.split(",")]
return ["*"]
class Config:
env_file = ".env"
env_file_encoding = "utf-8"
extra = "ignore"
@lru_cache()
def get_settings() -> Settings:
"""Cached settings instance β€” loaded once on startup."""
return Settings()