# RAG (Retrieval-Augmented Generation) Configuration
# SPARKNET Document Intelligence Integration

# =============================================================================
# Vector Store Settings
# =============================================================================
vector_store:
  # Store type: "chroma" (default) or "memory" (for testing)
  type: chroma

  # ChromaDB settings
  chroma:
    # Persistence directory for vector store
    persist_directory: "./.sparknet/chroma_db"

    # Collection name for document chunks
    collection_name: "sparknet_documents"

    # Distance metric: "cosine" (default), "l2", or "ip"
    distance_metric: cosine

    # Anonymized telemetry (set to false to disable)
    anonymized_telemetry: false

# =============================================================================
# Embedding Settings
# =============================================================================
embeddings:
  # Provider: "ollama" (default, local) or "openai" (cloud, requires API key)
  provider: ollama

  # Ollama settings (local, privacy-preserving)
  ollama:
    # Model name for embeddings
    # Recommended: nomic-embed-text (768 dims) or mxbai-embed-large (1024 dims)
    model: nomic-embed-text

    # Ollama server URL
    base_url: "http://localhost:11434"

    # Request timeout in seconds
    timeout: 30

  # OpenAI settings (cloud, disabled by default)
  openai:
    # IMPORTANT: OpenAI is disabled by default for privacy
    # Set to true only if you explicitly need cloud embeddings
    enabled: false

    # Model name (if enabled)
    model: text-embedding-3-small

    # API key (from environment variable OPENAI_API_KEY)
    # Never store API keys in config files
    api_key_env: OPENAI_API_KEY

  # Caching settings
  cache:
    # Enable embedding cache for faster re-processing
    enabled: true

    # Maximum cache entries
    max_entries: 10000

# =============================================================================
# Indexer Settings
# =============================================================================
indexer:
  # Batch size for embedding generation
  batch_size: 32

  # Include bounding box metadata
  include_bbox: true

  # Include page numbers
  include_page: true

  # Include chunk type labels
  include_chunk_type: true

  # Skip empty chunks
  skip_empty_chunks: true

  # Minimum chunk text length (characters)
  min_chunk_length: 10

# =============================================================================
# Retriever Settings
# =============================================================================
retriever:
  # Default number of results to return
  default_top_k: 5

  # Maximum results to return
  max_results: 20

  # Minimum similarity score (0.0 - 1.0)
  # Chunks below this threshold are filtered out
  similarity_threshold: 0.5

  # Enable result reranking (experimental)
  enable_reranking: false

  # Number of results to rerank
  rerank_top_k: 10

  # Include evidence references in results
  include_evidence: true

  # Maximum snippet length in evidence
  evidence_snippet_length: 200

# =============================================================================
# Generator Settings (Answer Generation)
# =============================================================================
generator:
  # LLM provider for answer generation: "ollama" (default) or "openai"
  provider: ollama

  # Ollama settings (local)
  ollama:
    # Model for answer generation
    # Recommended: llama3.2, mistral, or phi3
    model: llama3.2

    # Ollama server URL
    base_url: "http://localhost:11434"

    # Request timeout in seconds
    timeout: 60

    # Generation parameters
    temperature: 0.1
    max_tokens: 1024

  # OpenAI settings (cloud, disabled by default)
  openai:
    enabled: false
    model: gpt-4o-mini
    api_key_env: OPENAI_API_KEY
    temperature: 0.1
    max_tokens: 1024

  # Confidence settings
  min_confidence: 0.5

  # Abstention policy
  # When true, the system will refuse to answer if confidence is too low
  abstain_on_low_confidence: true
  abstain_threshold: 0.3

  # Maximum context length for LLM
  max_context_length: 8000

  # Require citations in answers
  require_citations: true

# =============================================================================
# Document Intelligence Integration
# =============================================================================
document_intelligence:
  # Parser settings
  parser:
    render_dpi: 200
    max_pages: null  # null = no limit

  # Extraction settings
  extraction:
    min_field_confidence: 0.5
    abstain_on_low_confidence: true

  # Grounding settings
  grounding:
    enable_crops: true
    crop_output_dir: "./.sparknet/crops"

# =============================================================================
# Performance Settings
# =============================================================================
performance:
  # Number of parallel workers for batch processing
  num_workers: 4

  # Chunk processing batch size
  chunk_batch_size: 100

  # Enable async processing where supported
  async_enabled: true

# =============================================================================
# Logging Settings
# =============================================================================
logging:
  # Log level: DEBUG, INFO, WARNING, ERROR
  level: INFO

  # Log RAG queries and results
  log_queries: false

  # Log embedding operations
  log_embeddings: false