Spaces:

MHamdan
/

SPARKNET

Sleeping

App Files Files Community

SPARKNET / configs /rag.yaml

MHamdan

Initial commit: SPARKNET framework

d520909 26 days ago

raw

history blame contribute delete

5.51 kB

	# RAG (Retrieval-Augmented Generation) Configuration
	# SPARKNET Document Intelligence Integration

	# =============================================================================
	# Vector Store Settings
	# =============================================================================
	vector_store:
	# Store type: "chroma" (default) or "memory" (for testing)
	type: chroma

	# ChromaDB settings
	chroma:
	# Persistence directory for vector store
	persist_directory: "./.sparknet/chroma_db"

	# Collection name for document chunks
	collection_name: "sparknet_documents"

	# Distance metric: "cosine" (default), "l2", or "ip"
	distance_metric: cosine

	# Anonymized telemetry (set to false to disable)
	anonymized_telemetry: false

	# =============================================================================
	# Embedding Settings
	# =============================================================================
	embeddings:
	# Provider: "ollama" (default, local) or "openai" (cloud, requires API key)
	provider: ollama

	# Ollama settings (local, privacy-preserving)
	ollama:
	# Model name for embeddings
	# Recommended: nomic-embed-text (768 dims) or mxbai-embed-large (1024 dims)
	model: nomic-embed-text

	# Ollama server URL
	base_url: "http://localhost:11434"

	# Request timeout in seconds
	timeout: 30

	# OpenAI settings (cloud, disabled by default)
	openai:
	# IMPORTANT: OpenAI is disabled by default for privacy
	# Set to true only if you explicitly need cloud embeddings
	enabled: false

	# Model name (if enabled)
	model: text-embedding-3-small

	# API key (from environment variable OPENAI_API_KEY)
	# Never store API keys in config files
	api_key_env: OPENAI_API_KEY

	# Caching settings
	cache:
	# Enable embedding cache for faster re-processing
	enabled: true

	# Maximum cache entries
	max_entries: 10000

	# =============================================================================
	# Indexer Settings
	# =============================================================================
	indexer:
	# Batch size for embedding generation
	batch_size: 32

	# Include bounding box metadata
	include_bbox: true

	# Include page numbers
	include_page: true

	# Include chunk type labels
	include_chunk_type: true

	# Skip empty chunks
	skip_empty_chunks: true

	# Minimum chunk text length (characters)
	min_chunk_length: 10

	# =============================================================================
	# Retriever Settings
	# =============================================================================
	retriever:
	# Default number of results to return
	default_top_k: 5

	# Maximum results to return
	max_results: 20

	# Minimum similarity score (0.0 - 1.0)
	# Chunks below this threshold are filtered out
	similarity_threshold: 0.5

	# Enable result reranking (experimental)
	enable_reranking: false

	# Number of results to rerank
	rerank_top_k: 10

	# Include evidence references in results
	include_evidence: true

	# Maximum snippet length in evidence
	evidence_snippet_length: 200

	# =============================================================================
	# Generator Settings (Answer Generation)
	# =============================================================================
	generator:
	# LLM provider for answer generation: "ollama" (default) or "openai"
	provider: ollama

	# Ollama settings (local)
	ollama:
	# Model for answer generation
	# Recommended: llama3.2, mistral, or phi3
	model: llama3.2

	# Ollama server URL
	base_url: "http://localhost:11434"

	# Request timeout in seconds
	timeout: 60

	# Generation parameters
	temperature: 0.1
	max_tokens: 1024

	# OpenAI settings (cloud, disabled by default)
	openai:
	enabled: false
	model: gpt-4o-mini
	api_key_env: OPENAI_API_KEY
	temperature: 0.1
	max_tokens: 1024

	# Confidence settings
	min_confidence: 0.5

	# Abstention policy
	# When true, the system will refuse to answer if confidence is too low
	abstain_on_low_confidence: true
	abstain_threshold: 0.3

	# Maximum context length for LLM
	max_context_length: 8000

	# Require citations in answers
	require_citations: true

	# =============================================================================
	# Document Intelligence Integration
	# =============================================================================
	document_intelligence:
	# Parser settings
	parser:
	render_dpi: 200
	max_pages: null # null = no limit

	# Extraction settings
	extraction:
	min_field_confidence: 0.5
	abstain_on_low_confidence: true

	# Grounding settings
	grounding:
	enable_crops: true
	crop_output_dir: "./.sparknet/crops"

	# =============================================================================
	# Performance Settings
	# =============================================================================
	performance:
	# Number of parallel workers for batch processing
	num_workers: 4

	# Chunk processing batch size
	chunk_batch_size: 100

	# Enable async processing where supported
	async_enabled: true

	# =============================================================================
	# Logging Settings
	# =============================================================================
	logging:
	# Log level: DEBUG, INFO, WARNING, ERROR
	level: INFO

	# Log RAG queries and results
	log_queries: false

	# Log embedding operations
	log_embeddings: false