Spaces:
Sleeping
Sleeping
| """ | |
| Configuration management for Developer Docs AI Copilot. | |
| """ | |
| import os | |
| from pathlib import Path | |
| from typing import Optional | |
| from urllib.parse import urlparse | |
| from pydantic_settings import BaseSettings | |
| from pydantic import Field, model_validator | |
| class Settings(BaseSettings): | |
| """Application settings loaded from environment variables.""" | |
| # API Keys | |
| hf_token: str = Field(default="", alias="HF_TOKEN") | |
| # Model Configuration | |
| llm_model: str = Field( | |
| default="meta-llama/Llama-3.2-3B-Instruct", | |
| alias="LLM_MODEL" | |
| ) | |
| llm_max_tokens: int = Field(default=512, alias="LLM_MAX_TOKENS") | |
| llm_temperature: float = Field(default=0.1, alias="LLM_TEMPERATURE") | |
| embedding_model: str = Field( | |
| default="sentence-transformers/all-MiniLM-L6-v2", | |
| alias="EMBEDDING_MODEL" | |
| ) | |
| # Vector Database | |
| chroma_persist_dir: str = Field( | |
| default="./data/vectordb", | |
| alias="CHROMA_PERSIST_DIR" | |
| ) | |
| collection_name: str = Field( | |
| default="developer_docs", | |
| alias="COLLECTION_NAME" | |
| ) | |
| # Chunking Configuration | |
| chunk_size: int = Field(default=600, alias="CHUNK_SIZE") | |
| chunk_overlap: int = Field(default=100, alias="CHUNK_OVERLAP") | |
| # Retrieval Configuration | |
| top_k_retrieval: int = Field(default=5, alias="TOP_K_RETRIEVAL") | |
| min_similarity_score: float = Field( | |
| default=0.2, | |
| alias="MIN_SIMILARITY_SCORE" | |
| ) | |
| # Application Settings | |
| app_port: int = Field(default=7860, alias="APP_PORT") | |
| log_level: str = Field(default="INFO", alias="LOG_LEVEL") | |
| # Documentation Source | |
| docs_url: str = Field( | |
| default="https://fastapi.tiangolo.com", | |
| alias="DOCS_URL" | |
| ) | |
| # Human-readable name for the docs. it is auto-derived from URL if not set | |
| docs_name: str = Field(default="", alias="DOCS_NAME") | |
| docs_url_patterns: str = Field(default="", alias="DOCS_URL_PATTERNS") | |
| def set_docs_name(self) -> "Settings": | |
| if not self.docs_name: | |
| hostname = urlparse(self.docs_url).hostname or "" | |
| name = hostname.split(".")[0].replace("-", " ").title() | |
| self.docs_name = name | |
| return self | |
| class Config: | |
| env_file = ".env" | |
| env_file_encoding = "utf-8" | |
| case_sensitive = False | |
| # Global settings instance | |
| settings = Settings() | |
| # Directory paths | |
| PROJECT_ROOT = Path(__file__).parent.parent | |
| DATA_DIR = PROJECT_ROOT / "data" | |
| RAW_DATA_DIR = DATA_DIR / "raw" | |
| PROCESSED_DATA_DIR = DATA_DIR / "processed" | |
| VECTORDB_DIR = DATA_DIR / "vectordb" | |
| EVALS_DIR = PROJECT_ROOT / "evals" | |
| RESULTS_DIR = EVALS_DIR / "results" | |
| # Ensure directories exist | |
| for directory in [RAW_DATA_DIR, PROCESSED_DATA_DIR, VECTORDB_DIR, RESULTS_DIR]: | |
| directory.mkdir(parents=True, exist_ok=True) | |