File size: 3,632 Bytes
f9e2c6d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | """
config.py β Centralised configuration for DevDocs AI.
All tuneable parameters live here so the rest of the codebase imports from one place.
"""
import os
from pathlib import Path
from dotenv import load_dotenv
# βββ Paths ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
BASE_DIR = Path(__file__).parent
DATA_DIR = BASE_DIR / "data"
VECTOR_DB_DIR = DATA_DIR / "vector_db"
UPLOAD_DIR = DATA_DIR / "uploads"
DATA_DIR.mkdir(parents=True, exist_ok=True)
VECTOR_DB_DIR.mkdir(parents=True, exist_ok=True)
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
# βββ Ingestion ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
ALLOWED_EXTENSIONS = {
# Python
".py",
# JavaScript / TypeScript
".js", ".ts", ".jsx", ".tsx", ".mjs", ".cjs",
# PHP
".php", ".php3", ".php4", ".php5", ".phtml",
# Java / Kotlin
".java", ".kt", ".kts",
# C / C++
".c", ".cpp", ".h", ".hpp", ".cc",
# Systems
".go", ".rs",
# Ruby
".rb", ".rake",
# C# / .NET
".cs",
# Shell
".sh", ".bash", ".zsh",
# Docs / Config
".md", ".txt", ".yaml", ".yml", ".toml", ".json",
# HTML / CSS (if you want frontend code)
".html", ".css", ".scss",
# SQL
".sql",
}
MAX_FILE_SIZE_MB = 2 # skip files larger than this
# βββ Chunking βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
CHUNK_SIZE = 400 # tokens (approx characters / 4)
CHUNK_OVERLAP = 60 # token overlap between chunks
CHUNK_SIZE_CHARS = CHUNK_SIZE * 4 # character approximation
CHUNK_OVERLAP_CHARS = CHUNK_OVERLAP * 4
# βββ Embeddings βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
EMBEDDING_MODEL = "all-MiniLM-L6-v2"
EMBEDDING_DEVICE = "cpu"
# βββ Chroma βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
CHROMA_COLLECTION_NAME = "devdocs"
# βββ Retrieval ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
DEFAULT_TOP_K = 5
DEFAULT_SEARCH_TYPE = "similarity" # "similarity" | "mmr"
MMR_FETCH_K = 20 # candidate pool for MMR
MMR_LAMBDA_MULT = 0.5 # diversity vs relevance balance
# βββ LLM ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
LLM_MODEL = "openai/gpt-4.1-nano" # via litellm
LLM_MAX_TOKENS = 1024
LLM_TEMPERATURE = 0.1
OPENAI_API_KEY = load_dotenv(dotenv_path=Path(__file__).parent / ".env")
# βββ Evaluation βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
JUDGE_MODEL = "openai/gpt-4.1-nano"
EVAL_TOP_K = 5 |