Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| from dataclasses import dataclass | |
| class Settings: | |
| # Hugging Face dataset | |
| dataset_name: str = "rag-datasets/rag-mini-wikipedia" | |
| corpus_config: str = "text-corpus" | |
| qa_config: str = "question-answer" | |
| # Chunking | |
| chunk_chars: int = 900 | |
| overlap_chars: int = 150 | |
| # Retrieval | |
| top_k_bm25: int = 8 | |
| top_k_dense: int = 8 | |
| top_k_final: int = 6 | |
| # Dense model | |
| embed_model: str = "sentence-transformers/all-MiniLM-L6-v2" | |
| # Optional reranker | |
| rerank_model: str = "cross-encoder/ms-marco-MiniLM-L-6-v2" | |
| rerank_top_n: int = 20 # candidates to rerank | |
| # OpenAI | |
| default_openai_model: str = "gpt-4o-mini" | |
| # Artifacts | |
| artifacts_dir: str = "artifacts" | |
| chunks_jsonl: str = "chunks.jsonl" | |
| embeddings_npy: str = "embeddings.npy" | |
| SETTINGS = Settings() | |