File size: 774 Bytes
4fdc679 95c3e86 4fdc679 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
# rag/config.py
import os
# Huggingface Hub token
HF_TOKEN = os.getenv("HF_TOKEN")
# HF datasets repo info
HF_REPO_ID = os.getenv("HF_REPO_ID", "m97j/pls-datasets")
HF_INDEX_FILE = os.getenv("HF_INDEX_FILE", "faiss/faiss_index_flat.faiss")
HF_IDS_FILE = os.getenv("HF_IDS_FILE", "faiss/vector_ids.npy")
# Corpus dataset info
HF_CORPUS_REPO = os.getenv("HF_CORPUS_REPO", "HuggingFaceFW/finewiki")
HF_CORPUS_SUBSET = os.getenv("HF_CORPUS_SUBSET", "ko")
HF_CORPUS_SPLIT = os.getenv("HF_CORPUS_SPLIT", "train")
# Local paths
MARKER_DIR = os.getenv("MARKER_DIR", "rag/state")
CORPUS_READY_MARK = os.path.join(MARKER_DIR, ".corpus_ready")
# Embedding / LLM model
EMBED_MODEL = os.getenv("EMBED_MODEL", "intfloat/multilingual-e5-large")
TOP_K = int(os.getenv("TOP_K", "5"))
|