File size: 1,211 Bytes
c7256ee c27a4e3 c7256ee c27a4e3 c7256ee | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | # ------------------------------------------------------------------
# RAG CBT QUESTION-ANSWERING SYSTEM CONFIGURATION
# ------------------------------------------------------------------
project:
name: "cbt-rag-system"
category: "psychology"
doc_limit: null # Load all pages from the book
processing:
# Embedding model used for both vector db and evaluator similarity
embedding_model: "jinaai/jina-embeddings-v2-small-en"
# Options: sentence, recursive, semantic, fixed
technique: "recursive"
# Jina supports 8192 tokens (~32k chars), using 1000 chars for better context
chunk_size: 1000
chunk_overlap: 100
vector_db:
base_index_name: "cbt-book"
dimension: 512 # Jina outputs 512 dimensions
metric: "cosine"
batch_size: 50 # Reduced batch size for CPU processing
retrieval:
# Options: hybrid, semantic, bm25
mode: "hybrid"
# Options: cross-encoder, rrf
rerank_strategy: "cross-encoder"
use_mmr: False
top_k: 50
final_k: 5
generation:
temperature: 0.
max_new_tokens: 512
# The model used to Judge the others (OpenRouter)
judge_model: "deepseek/deepseek-v3.2"
# List of contestants in the tournament
models:
- "Llama-3-8B"
- "Mistral-7B"
- "TinyAya"
|