SPARKNET / configs /rag.yaml
MHamdan's picture
Initial commit: SPARKNET framework
d520909
# RAG (Retrieval-Augmented Generation) Configuration
# SPARKNET Document Intelligence Integration
# =============================================================================
# Vector Store Settings
# =============================================================================
vector_store:
# Store type: "chroma" (default) or "memory" (for testing)
type: chroma
# ChromaDB settings
chroma:
# Persistence directory for vector store
persist_directory: "./.sparknet/chroma_db"
# Collection name for document chunks
collection_name: "sparknet_documents"
# Distance metric: "cosine" (default), "l2", or "ip"
distance_metric: cosine
# Anonymized telemetry (set to false to disable)
anonymized_telemetry: false
# =============================================================================
# Embedding Settings
# =============================================================================
embeddings:
# Provider: "ollama" (default, local) or "openai" (cloud, requires API key)
provider: ollama
# Ollama settings (local, privacy-preserving)
ollama:
# Model name for embeddings
# Recommended: nomic-embed-text (768 dims) or mxbai-embed-large (1024 dims)
model: nomic-embed-text
# Ollama server URL
base_url: "http://localhost:11434"
# Request timeout in seconds
timeout: 30
# OpenAI settings (cloud, disabled by default)
openai:
# IMPORTANT: OpenAI is disabled by default for privacy
# Set to true only if you explicitly need cloud embeddings
enabled: false
# Model name (if enabled)
model: text-embedding-3-small
# API key (from environment variable OPENAI_API_KEY)
# Never store API keys in config files
api_key_env: OPENAI_API_KEY
# Caching settings
cache:
# Enable embedding cache for faster re-processing
enabled: true
# Maximum cache entries
max_entries: 10000
# =============================================================================
# Indexer Settings
# =============================================================================
indexer:
# Batch size for embedding generation
batch_size: 32
# Include bounding box metadata
include_bbox: true
# Include page numbers
include_page: true
# Include chunk type labels
include_chunk_type: true
# Skip empty chunks
skip_empty_chunks: true
# Minimum chunk text length (characters)
min_chunk_length: 10
# =============================================================================
# Retriever Settings
# =============================================================================
retriever:
# Default number of results to return
default_top_k: 5
# Maximum results to return
max_results: 20
# Minimum similarity score (0.0 - 1.0)
# Chunks below this threshold are filtered out
similarity_threshold: 0.5
# Enable result reranking (experimental)
enable_reranking: false
# Number of results to rerank
rerank_top_k: 10
# Include evidence references in results
include_evidence: true
# Maximum snippet length in evidence
evidence_snippet_length: 200
# =============================================================================
# Generator Settings (Answer Generation)
# =============================================================================
generator:
# LLM provider for answer generation: "ollama" (default) or "openai"
provider: ollama
# Ollama settings (local)
ollama:
# Model for answer generation
# Recommended: llama3.2, mistral, or phi3
model: llama3.2
# Ollama server URL
base_url: "http://localhost:11434"
# Request timeout in seconds
timeout: 60
# Generation parameters
temperature: 0.1
max_tokens: 1024
# OpenAI settings (cloud, disabled by default)
openai:
enabled: false
model: gpt-4o-mini
api_key_env: OPENAI_API_KEY
temperature: 0.1
max_tokens: 1024
# Confidence settings
min_confidence: 0.5
# Abstention policy
# When true, the system will refuse to answer if confidence is too low
abstain_on_low_confidence: true
abstain_threshold: 0.3
# Maximum context length for LLM
max_context_length: 8000
# Require citations in answers
require_citations: true
# =============================================================================
# Document Intelligence Integration
# =============================================================================
document_intelligence:
# Parser settings
parser:
render_dpi: 200
max_pages: null # null = no limit
# Extraction settings
extraction:
min_field_confidence: 0.5
abstain_on_low_confidence: true
# Grounding settings
grounding:
enable_crops: true
crop_output_dir: "./.sparknet/crops"
# =============================================================================
# Performance Settings
# =============================================================================
performance:
# Number of parallel workers for batch processing
num_workers: 4
# Chunk processing batch size
chunk_batch_size: 100
# Enable async processing where supported
async_enabled: true
# =============================================================================
# Logging Settings
# =============================================================================
logging:
# Log level: DEBUG, INFO, WARNING, ERROR
level: INFO
# Log RAG queries and results
log_queries: false
# Log embedding operations
log_embeddings: false