File size: 1,211 Bytes
c7256ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c27a4e3
 
c7256ee
 
 
 
 
 
c27a4e3
c7256ee
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# ------------------------------------------------------------------
# RAG CBT QUESTION-ANSWERING SYSTEM CONFIGURATION
# ------------------------------------------------------------------

project:
  name: "cbt-rag-system"
  category: "psychology"
  doc_limit: null # Load all pages from the book

processing:
  # Embedding model used for both vector db and evaluator similarity
  embedding_model: "jinaai/jina-embeddings-v2-small-en"
  # Options: sentence, recursive, semantic, fixed
  technique: "recursive"
  # Jina supports 8192 tokens (~32k chars), using 1000 chars for better context
  chunk_size: 1000
  chunk_overlap: 100

vector_db:
  base_index_name: "cbt-book"
  dimension: 512 # Jina outputs 512 dimensions
  metric: "cosine"
  batch_size: 50 # Reduced batch size for CPU processing

retrieval:
  # Options: hybrid, semantic, bm25
  mode: "hybrid"
  # Options: cross-encoder, rrf
  rerank_strategy: "cross-encoder"
  use_mmr: False
  top_k: 50
  final_k: 5

generation:
  temperature: 0.
  max_new_tokens: 512
  # The model used to Judge the others (OpenRouter)
  judge_model: "deepseek/deepseek-v3.2"

# List of contestants in the tournament
models:
  - "Llama-3-8B"
  - "Mistral-7B"
  - "TinyAya"