Spaces:
Sleeping
Sleeping
File size: 2,828 Bytes
14f13a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
"""
Configuration management for Developer Docs AI Copilot.
"""
import os
from pathlib import Path
from typing import Optional
from urllib.parse import urlparse
from pydantic_settings import BaseSettings
from pydantic import Field, model_validator
class Settings(BaseSettings):
"""Application settings loaded from environment variables."""
# API Keys
hf_token: str = Field(default="", alias="HF_TOKEN")
# Model Configuration
llm_model: str = Field(
default="meta-llama/Llama-3.2-3B-Instruct",
alias="LLM_MODEL"
)
llm_max_tokens: int = Field(default=512, alias="LLM_MAX_TOKENS")
llm_temperature: float = Field(default=0.1, alias="LLM_TEMPERATURE")
embedding_model: str = Field(
default="sentence-transformers/all-MiniLM-L6-v2",
alias="EMBEDDING_MODEL"
)
# Vector Database
chroma_persist_dir: str = Field(
default="./data/vectordb",
alias="CHROMA_PERSIST_DIR"
)
collection_name: str = Field(
default="developer_docs",
alias="COLLECTION_NAME"
)
# Chunking Configuration
chunk_size: int = Field(default=600, alias="CHUNK_SIZE")
chunk_overlap: int = Field(default=100, alias="CHUNK_OVERLAP")
# Retrieval Configuration
top_k_retrieval: int = Field(default=5, alias="TOP_K_RETRIEVAL")
min_similarity_score: float = Field(
default=0.2,
alias="MIN_SIMILARITY_SCORE"
)
# Application Settings
app_port: int = Field(default=7860, alias="APP_PORT")
log_level: str = Field(default="INFO", alias="LOG_LEVEL")
# Documentation Source
docs_url: str = Field(
default="https://fastapi.tiangolo.com",
alias="DOCS_URL"
)
# Human-readable name for the docs. it is auto-derived from URL if not set
docs_name: str = Field(default="", alias="DOCS_NAME")
docs_url_patterns: str = Field(default="", alias="DOCS_URL_PATTERNS")
@model_validator(mode="after")
def set_docs_name(self) -> "Settings":
if not self.docs_name:
hostname = urlparse(self.docs_url).hostname or ""
name = hostname.split(".")[0].replace("-", " ").title()
self.docs_name = name
return self
class Config:
env_file = ".env"
env_file_encoding = "utf-8"
case_sensitive = False
# Global settings instance
settings = Settings()
# Directory paths
PROJECT_ROOT = Path(__file__).parent.parent
DATA_DIR = PROJECT_ROOT / "data"
RAW_DATA_DIR = DATA_DIR / "raw"
PROCESSED_DATA_DIR = DATA_DIR / "processed"
VECTORDB_DIR = DATA_DIR / "vectordb"
EVALS_DIR = PROJECT_ROOT / "evals"
RESULTS_DIR = EVALS_DIR / "results"
# Ensure directories exist
for directory in [RAW_DATA_DIR, PROCESSED_DATA_DIR, VECTORDB_DIR, RESULTS_DIR]:
directory.mkdir(parents=True, exist_ok=True)
|