Spaces:
Running
Running
Paramjit Singh
Merge pull request #336 from Srushti-Kamble14/feat/celery-redis-pdf-processing
5926dae unverified | """ | |
| Application configuration via pydantic-settings. | |
| All config is loaded from environment variables with sensible defaults. | |
| """ | |
| import os | |
| from pydantic_settings import BaseSettings | |
| from functools import lru_cache | |
| class Settings(BaseSettings): | |
| # ββ App ββββββββββββββββββββββββββββββββββββββββββββββ | |
| APP_NAME: str = "Document AI Analyst" | |
| SECRET_KEY: str = "change-me-in-production-please" | |
| DEBUG: bool = False | |
| ENVIRONMENT: str = "development" | |
| ALLOWED_ORIGINS: str = "http://localhost:3000,http://localhost:7860" | |
| # ββ Database βββββββββββββββββββββββββββββββββββββββββ | |
| DATABASE_URL: str = "sqlite:///./data/app.db" | |
| # ββ Auth βββββββββββββββββββββββββββββββββββββββββββββ | |
| JWT_ALGORITHM: str = "HS256" | |
| JWT_ACCESS_EXPIRY_MINUTES: int = 15 | |
| JWT_REFRESH_EXPIRY_DAYS: int = 7 | |
| GOOGLE_CLIENT_ID: str = "" | |
| HF_CLIENT_ID: str = "" | |
| HF_CLIENT_SECRET: str = "" | |
| HF_REDIRECT_URI: str = "" | |
| FRONTEND_URL: str = "http://localhost:3000" | |
| # Google Drive background sync | |
| DRIVE_SYNC_ENABLED: bool = False | |
| DRIVE_SYNC_INTERVAL_MINUTES: int = 60 | |
| GOOGLE_SERVICE_ACCOUNT_FILE: str = "" | |
| # Celery / Redis background processing | |
| CELERY_BROKER_URL: str = "redis://localhost:6379/0" | |
| CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1" | |
| CELERY_TASK_TRACK_STARTED: bool = True | |
| # ββ File Upload ββββββββββββββββββββββββββββββββββββββ | |
| UPLOAD_DIR: str = "./data/uploads" | |
| MAX_UPLOAD_SIZE_MB: int = 20 | |
| ALLOWED_EXTENSIONS: set = {"pdf", "docx", "txt", "md"} | |
| ALLOWED_MIME_TYPES: dict = { | |
| ".pdf": ["application/pdf"], | |
| ".docx": [ | |
| "application/vnd.openxmlformats-officedocument.wordprocessingml.document", | |
| "application/zip", | |
| ], | |
| ".txt": ["text/plain"], | |
| ".md": ["text/markdown"], | |
| } | |
| # ββ RAG Pipeline βββββββββββββββββββββββββββββββββββββ | |
| CHUNK_SIZE: int = 1000 | |
| CHUNK_OVERLAP: int = 200 | |
| TOP_K_RETRIEVAL: int = 10 | |
| TOP_K_RERANK: int = 5 | |
| # ββ Knowledge Graph (GraphRAG) βββββββββββββββββββββββ | |
| GRAPH_PERSIST_DIR: str = "./data/graphs" | |
| GRAPH_ENTITY_LABELS: set = { | |
| "PERSON", | |
| "ORG", | |
| "GPE", | |
| "LOC", | |
| "PRODUCT", | |
| "EVENT", | |
| "WORK_OF_ART", | |
| "LAW", | |
| "NORP", | |
| "FAC", | |
| } | |
| GRAPH_MAX_RELATIONSHIPS: int = 12 | |
| # ββ Embeddings (local HuggingFace model) βββββββββββββ | |
| EMBEDDING_MODEL: str = "sentence-transformers/all-MiniLM-L6-v2" | |
| EMBEDDING_DIMENSION: int = 384 | |
| # ββ ChromaDB βββββββββββββββββββββββββββββββββββββββββ | |
| CHROMA_PERSIST_DIR: str = "./data/chroma_db" | |
| # ββ LLM (HuggingFace Inference API) ββββββββββββββββββ | |
| HF_TOKEN: str = os.getenv("HF_TOKEN", "") # HuggingFace API token (set in .env) | |
| LLM_MODEL: str = "Qwen/Qwen2.5-72B-Instruct" | |
| LLM_MAX_NEW_TOKENS: int = 1024 | |
| LLM_TEMPERATURE: float = 0.3 | |
| SUMMARY_MAX_TOKENS: int = 512 | |
| # ββ LangSmith Tracing (optional) βββββββββββββββββββββ | |
| LANGSMITH_TRACING: bool = False | |
| LANGSMITH_API_KEY: str = "" | |
| LANGSMITH_ENDPOINT: str = "https://api.smith.langchain.com" | |
| LANGSMITH_PROJECT: str = "pdf-assistant-rag" | |
| # ββ Reranker βββββββββββββββββββββββββββββββββββββββββ | |
| RERANKER_MODEL: str = "cross-encoder/ms-marco-MiniLM-L-6-v2" | |
| # ββ Vision / Image captioning βββββββββββββββββββββ | |
| VISION_PROVIDER: str | None = None # e.g. 'openai' | |
| VISION_MODEL: str | None = None | |
| OPENAI_API_KEY: str = "" | |
| # ββ Workspace Invitation βββββββββββββββββββββββββ | |
| APP_URL: str = "http://localhost:3000" | |
| INVITE_TOKEN_EXPIRY_HOURS: int = 72 | |
| EMAIL_FROM: str = "no-reply@example.com" | |
| SMTP_HOST: str = "" | |
| SMTP_PORT: int = 0 | |
| SMTP_USER: str = "" | |
| SMTP_PASSWORD: str = "" | |
| def cors_origins(self) -> list[str]: | |
| if self.ENVIRONMENT == "production": | |
| return [o.strip() for o in self.ALLOWED_ORIGINS.split(",")] | |
| return ["*"] | |
| class Config: | |
| env_file = ".env" | |
| env_file_encoding = "utf-8" | |
| extra = "ignore" | |
| def get_settings() -> Settings: | |
| """Cached settings instance β loaded once on startup.""" | |
| return Settings() | |