Spaces:
Sleeping
Sleeping
| # Directories to index | |
| watch_paths: | |
| - ./data/scifact | |
| - ./data/nfcorpus #modify this | |
| # File extensions to include | |
| include_extensions: | |
| - ".pdf" | |
| - ".docx" | |
| - ".txt" | |
| - ".md" | |
| - ".pptx" | |
| - ".xlsx" | |
| - ".py" | |
| - ".js" | |
| - ".ipynb" | |
| # --- Add these --- | |
| top_k: 5 # final results returned to user | |
| candidate_k: 20 # candidates fetched before reranking | |
| query_expansion: true # WordNet synonym expansion | |
| max_synonyms: 5 # max synonyms to append | |
| reranking_enabled: true # cross-encoder reranking | |
| reranker_model: "cross-encoder/ms-marco-MiniLM-L-6-v2" | |
| # Directories to skip | |
| skip_directories: | |
| - ".git" | |
| - "node_modules" | |
| - "__pycache__" | |
| - ".venv" | |
| # Where to store index data | |
| data_dir: "./data" | |
| embedding_model: "all-MiniLM-L6-v2" | |
| # embedding_model: BAAI/bge-small-en-v1.5 | |
| debounce_seconds: 5 |