File size: 1,037 Bytes
4fdc679
 
 
 
 
 
 
2aa7bf4
95c3e86
 
4fdc679
 
 
 
 
 
 
 
 
 
2aa7bf4
 
 
 
 
 
 
 
 
 
4fdc679
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# rag/config.py
import os

# Huggingface Hub token
HF_TOKEN = os.getenv("HF_TOKEN")

# HF datasets repo info
HF_DS_REPO_ID = os.getenv("HF_REPO_ID", "m97j/pls-datasets")
HF_INDEX_FILE = os.getenv("HF_INDEX_FILE", "faiss/faiss_index_flat.faiss")
HF_IDS_FILE = os.getenv("HF_IDS_FILE", "faiss/vector_ids.npy")

# Corpus dataset info
HF_CORPUS_REPO = os.getenv("HF_CORPUS_REPO", "HuggingFaceFW/finewiki")
HF_CORPUS_SUBSET = os.getenv("HF_CORPUS_SUBSET", "ko")
HF_CORPUS_SPLIT = os.getenv("HF_CORPUS_SPLIT", "train")

# Local paths
MARKER_DIR = os.getenv("MARKER_DIR", "rag/state")
CORPUS_READY_MARK = os.path.join(MARKER_DIR, ".corpus_ready")

# Embedding model
HF_MODEL_REPO_ID = os.getenv("HF_MODEL_REPO_ID", "m97j/pragmatic-search")
EMBED_MODEL = os.getenv("EMBED_MODEL", "model_quantized.onnx")
EMBED_DIR = os.getenv("EMBED_DIR", "embedder")

# Reranking model
RERANK_MODEL = os.getenv("RERANK_MODEL", "model_quantized.onnx")
RERANK_DIR = os.getenv("RERANK_DIR", "reranker")

# Retrieval settings
TOP_K = int(os.getenv("TOP_K", "5"))