Semantic_File / config.yaml
JackSparrow89's picture
Upload 65 files
bb04c5f verified
raw
history blame contribute delete
887 Bytes
# Directories to index
watch_paths:
- ./data/scifact
- ./data/nfcorpus #modify this
# File extensions to include
include_extensions:
- ".pdf"
- ".docx"
- ".txt"
- ".md"
- ".pptx"
- ".xlsx"
- ".py"
- ".js"
- ".ipynb"
# --- Add these ---
top_k: 5 # final results returned to user
candidate_k: 20 # candidates fetched before reranking
query_expansion: true # WordNet synonym expansion
max_synonyms: 5 # max synonyms to append
reranking_enabled: true # cross-encoder reranking
reranker_model: "cross-encoder/ms-marco-MiniLM-L-6-v2"
# Directories to skip
skip_directories:
- ".git"
- "node_modules"
- "__pycache__"
- ".venv"
# Where to store index data
data_dir: "./data"
embedding_model: "all-MiniLM-L6-v2"
# embedding_model: BAAI/bge-small-en-v1.5
debounce_seconds: 5