llm-chat-project / rag /config.py
DunasAnastasiia
Initial commit (Xet)
7c2e31a
raw
history blame contribute delete
880 Bytes
from __future__ import annotations
from dataclasses import dataclass
@dataclass(frozen=True)
class Settings:
# Hugging Face dataset
dataset_name: str = "rag-datasets/rag-mini-wikipedia"
corpus_config: str = "text-corpus"
qa_config: str = "question-answer"
# Chunking
chunk_chars: int = 900
overlap_chars: int = 150
# Retrieval
top_k_bm25: int = 8
top_k_dense: int = 8
top_k_final: int = 6
# Dense model
embed_model: str = "sentence-transformers/all-MiniLM-L6-v2"
# Optional reranker
rerank_model: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"
rerank_top_n: int = 20 # candidates to rerank
# OpenAI
default_openai_model: str = "gpt-4o-mini"
# Artifacts
artifacts_dir: str = "artifacts"
chunks_jsonl: str = "chunks.jsonl"
embeddings_npy: str = "embeddings.npy"
SETTINGS = Settings()