hrbot / src /config.py
Sonu Prasad
updated
8a1c0d1
"""HR Report Generator - Configuration Module (HuggingFace Version)."""
from pathlib import Path
from typing import Literal
import os
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
"""Application settings loaded from environment variables."""
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
extra="ignore",
)
# LLM Configuration (loaded from environment or endpoints.txt)
llm_model: str = Field(default="google/gemma-2-9b-it:free", description="LLM model name")
llm_temperature: float = Field(default=0.0, ge=0.0, le=1.0, description="LLM temperature")
# Embedding Configuration
embedding_model: str = Field(
default="BAAI/bge-small-en-v1.5",
description="HuggingFace embedding model",
)
# Paths (relative for Docker)
data_dir: Path = Field(default=Path("./data"))
documents_dir: Path = Field(default=Path("./data/documents"))
markdown_dir: Path = Field(default=Path("./data/markdown"))
embeddings_dir: Path = Field(default=Path("./data/embeddings"))
outputs_dir: Path = Field(default=Path("./data/outputs"))
templates_dir: Path = Field(default=Path("./templates"))
# Vector Store
faiss_index_path: Path = Field(default=Path("./data/embeddings/faiss_index"))
# Chunking Configuration
chunk_size: int = Field(default=512, description="Target chunk size in tokens")
chunk_overlap: int = Field(default=50, description="Overlap between chunks")
# Retrieval Configuration
retrieval_top_k: int = Field(default=5, description="Number of documents to retrieve")
retrieval_min_score: float = Field(default=0.3, description="Minimum similarity score")
def ensure_directories(self) -> None:
"""Create all required directories if they don't exist."""
for path in [
self.data_dir,
self.documents_dir,
self.markdown_dir,
self.embeddings_dir,
self.outputs_dir,
self.templates_dir,
]:
path.mkdir(parents=True, exist_ok=True)
# Global settings instance
settings = Settings()