DeveloperDocs_RAG / src /config.py
Aishwarya30998's picture
Deploy DeveloperDocs-AI-Copilot-RAG to Hugging Face Space
14f13a5
"""
Configuration management for Developer Docs AI Copilot.
"""
import os
from pathlib import Path
from typing import Optional
from urllib.parse import urlparse
from pydantic_settings import BaseSettings
from pydantic import Field, model_validator
class Settings(BaseSettings):
"""Application settings loaded from environment variables."""
# API Keys
hf_token: str = Field(default="", alias="HF_TOKEN")
# Model Configuration
llm_model: str = Field(
default="meta-llama/Llama-3.2-3B-Instruct",
alias="LLM_MODEL"
)
llm_max_tokens: int = Field(default=512, alias="LLM_MAX_TOKENS")
llm_temperature: float = Field(default=0.1, alias="LLM_TEMPERATURE")
embedding_model: str = Field(
default="sentence-transformers/all-MiniLM-L6-v2",
alias="EMBEDDING_MODEL"
)
# Vector Database
chroma_persist_dir: str = Field(
default="./data/vectordb",
alias="CHROMA_PERSIST_DIR"
)
collection_name: str = Field(
default="developer_docs",
alias="COLLECTION_NAME"
)
# Chunking Configuration
chunk_size: int = Field(default=600, alias="CHUNK_SIZE")
chunk_overlap: int = Field(default=100, alias="CHUNK_OVERLAP")
# Retrieval Configuration
top_k_retrieval: int = Field(default=5, alias="TOP_K_RETRIEVAL")
min_similarity_score: float = Field(
default=0.2,
alias="MIN_SIMILARITY_SCORE"
)
# Application Settings
app_port: int = Field(default=7860, alias="APP_PORT")
log_level: str = Field(default="INFO", alias="LOG_LEVEL")
# Documentation Source
docs_url: str = Field(
default="https://fastapi.tiangolo.com",
alias="DOCS_URL"
)
# Human-readable name for the docs. it is auto-derived from URL if not set
docs_name: str = Field(default="", alias="DOCS_NAME")
docs_url_patterns: str = Field(default="", alias="DOCS_URL_PATTERNS")
@model_validator(mode="after")
def set_docs_name(self) -> "Settings":
if not self.docs_name:
hostname = urlparse(self.docs_url).hostname or ""
name = hostname.split(".")[0].replace("-", " ").title()
self.docs_name = name
return self
class Config:
env_file = ".env"
env_file_encoding = "utf-8"
case_sensitive = False
# Global settings instance
settings = Settings()
# Directory paths
PROJECT_ROOT = Path(__file__).parent.parent
DATA_DIR = PROJECT_ROOT / "data"
RAW_DATA_DIR = DATA_DIR / "raw"
PROCESSED_DATA_DIR = DATA_DIR / "processed"
VECTORDB_DIR = DATA_DIR / "vectordb"
EVALS_DIR = PROJECT_ROOT / "evals"
RESULTS_DIR = EVALS_DIR / "results"
# Ensure directories exist
for directory in [RAW_DATA_DIR, PROCESSED_DATA_DIR, VECTORDB_DIR, RESULTS_DIR]:
directory.mkdir(parents=True, exist_ok=True)