Spaces:

MHamdan
/

SPARKNET

Sleeping

File size: 17,955 Bytes

d520909

"""
Agentic RAG Orchestrator

Coordinates the multi-agent RAG pipeline with self-correction loop.
Follows FAANG best practices for production RAG systems.

Pipeline:
    Query -> Plan -> Retrieve -> Rerank -> Synthesize -> Validate -> (Revise?) -> Response

Key Features:
- LangGraph-style state machine
- Self-correction loop (up to N attempts)
- Streaming support
- Comprehensive logging and metrics
- Graceful degradation
"""

from typing import List, Optional, Dict, Any, Generator, Tuple
from pydantic import BaseModel, Field
from loguru import logger
from dataclasses import dataclass, field
from enum import Enum
import time

from ..store import VectorStore, get_vector_store, VectorStoreConfig
from ..embeddings import EmbeddingAdapter, get_embedding_adapter, EmbeddingConfig

from .query_planner import QueryPlannerAgent, QueryPlan, SubQuery
from .retriever import RetrieverAgent, RetrievalResult, HybridSearchConfig
from .reranker import RerankerAgent, RankedResult, RerankerConfig
from .synthesizer import SynthesizerAgent, SynthesisResult, Citation, SynthesizerConfig
from .critic import CriticAgent, CriticResult, ValidationIssue, CriticConfig


class PipelineStage(str, Enum):
    """Stages in the RAG pipeline."""
    PLANNING = "planning"
    RETRIEVAL = "retrieval"
    RERANKING = "reranking"
    SYNTHESIS = "synthesis"
    VALIDATION = "validation"
    REVISION = "revision"
    COMPLETE = "complete"


class RAGConfig(BaseModel):
    """Configuration for the agentic RAG system."""
    # LLM settings (shared across agents)
    model: str = Field(default="llama3.2:3b")
    base_url: str = Field(default="http://localhost:11434")

    # Pipeline settings
    max_revision_attempts: int = Field(default=2, ge=0, le=5)
    enable_query_planning: bool = Field(default=True)
    enable_reranking: bool = Field(default=True)
    enable_validation: bool = Field(default=True)

    # Retrieval settings
    retrieval_top_k: int = Field(default=10, ge=1)
    final_top_k: int = Field(default=5, ge=1)

    # Confidence thresholds
    min_confidence: float = Field(default=0.5, ge=0.0, le=1.0)

    # Logging
    verbose: bool = Field(default=False)


@dataclass
class RAGState:
    """State maintained through the pipeline."""
    query: str
    stage: PipelineStage = PipelineStage.PLANNING

    # Intermediate results
    query_plan: Optional[QueryPlan] = None
    retrieved_chunks: List[RetrievalResult] = field(default_factory=list)
    ranked_chunks: List[RankedResult] = field(default_factory=list)
    synthesis_result: Optional[SynthesisResult] = None
    critic_result: Optional[CriticResult] = None

    # Revision tracking
    revision_attempt: int = 0
    revision_history: List[SynthesisResult] = field(default_factory=list)

    # Metrics
    start_time: float = field(default_factory=time.time)
    stage_times: Dict[str, float] = field(default_factory=dict)

    # Errors
    errors: List[str] = field(default_factory=list)


class RAGResponse(BaseModel):
    """Final response from the RAG system."""
    answer: str
    citations: List[Citation]
    confidence: float

    # Metadata
    query: str
    num_sources: int
    validated: bool
    revision_attempts: int

    # Detailed info (optional)
    query_plan: Optional[Dict[str, Any]] = None
    validation_details: Optional[Dict[str, Any]] = None
    latency_ms: float = 0.0


class AgenticRAG:
    """
    Production-grade Multi-Agent RAG System.

    Orchestrates:
    - QueryPlannerAgent: Query decomposition and planning
    - RetrieverAgent: Hybrid retrieval
    - RerankerAgent: Cross-encoder reranking
    - SynthesizerAgent: Answer generation
    - CriticAgent: Validation and hallucination detection

    Features:
    - Self-correction loop
    - Graceful degradation
    - Comprehensive metrics
    """

    def __init__(
        self,
        config: Optional[RAGConfig] = None,
        vector_store: Optional[VectorStore] = None,
        embedding_adapter: Optional[EmbeddingAdapter] = None,
    ):
        """
        Initialize the Agentic RAG system.

        Args:
            config: RAG configuration
            vector_store: Vector store for retrieval
            embedding_adapter: Embedding adapter
        """
        self.config = config or RAGConfig()

        # Initialize shared components
        self._store = vector_store
        self._embedder = embedding_adapter

        # Initialize agents
        self._init_agents()

        logger.info(
            f"AgenticRAG initialized (model={self.config.model}, "
            f"revision_attempts={self.config.max_revision_attempts})"
        )

    def _init_agents(self):
        """Initialize all agents with shared configuration."""
        # Query Planner
        self.planner = QueryPlannerAgent(
            model=self.config.model,
            base_url=self.config.base_url,
            use_llm=self.config.enable_query_planning,
        )

        # Retriever
        retriever_config = HybridSearchConfig(
            dense_top_k=self.config.retrieval_top_k,
            sparse_top_k=self.config.retrieval_top_k,
            final_top_k=self.config.retrieval_top_k,
        )
        self.retriever = RetrieverAgent(
            config=retriever_config,
            vector_store=self._store,
            embedding_adapter=self._embedder,
        )

        # Reranker
        reranker_config = RerankerConfig(
            model=self.config.model,
            base_url=self.config.base_url,
            top_k=self.config.final_top_k,
            use_llm_rerank=self.config.enable_reranking,
            min_relevance_score=0.1,  # Lower threshold to allow more results
        )
        self.reranker = RerankerAgent(config=reranker_config)

        # Synthesizer
        synth_config = SynthesizerConfig(
            model=self.config.model,
            base_url=self.config.base_url,
            confidence_threshold=self.config.min_confidence,
        )
        self.synthesizer = SynthesizerAgent(config=synth_config)

        # Critic
        critic_config = CriticConfig(
            model=self.config.model,
            base_url=self.config.base_url,
        )
        self.critic = CriticAgent(config=critic_config)

    @property
    def store(self) -> VectorStore:
        """Get vector store (lazy initialization)."""
        if self._store is None:
            self._store = get_vector_store()
        return self._store

    @property
    def embedder(self) -> EmbeddingAdapter:
        """Get embedding adapter (lazy initialization)."""
        if self._embedder is None:
            self._embedder = get_embedding_adapter()
        return self._embedder

    def query(
        self,
        question: str,
        filters: Optional[Dict[str, Any]] = None,
    ) -> RAGResponse:
        """
        Process a query through the full RAG pipeline.

        Args:
            question: User's question
            filters: Optional metadata filters for retrieval

        Returns:
            RAGResponse with answer and metadata
        """
        # Initialize state
        state = RAGState(query=question)

        try:
            # Stage 1: Query Planning
            state = self._plan(state)

            # Stage 2: Retrieval
            state = self._retrieve(state, filters)

            # Stage 3: Reranking
            state = self._rerank(state)

            # Stage 4: Synthesis
            state = self._synthesize(state)

            # Stage 5: Validation + Revision Loop
            if self.config.enable_validation:
                state = self._validate_and_revise(state)

            # Build response
            return self._build_response(state)

        except Exception as e:
            logger.error(f"RAG pipeline error: {e}")
            state.errors.append(str(e))
            return self._build_error_response(state, str(e))

    def query_stream(
        self,
        question: str,
        filters: Optional[Dict[str, Any]] = None,
    ) -> Generator[Tuple[PipelineStage, Any], None, None]:
        """
        Process query with streaming updates.

        Yields:
            Tuple of (stage, stage_result)
        """
        state = RAGState(query=question)

        try:
            # Planning
            state = self._plan(state)
            yield PipelineStage.PLANNING, state.query_plan

            # Retrieval
            state = self._retrieve(state, filters)
            yield PipelineStage.RETRIEVAL, len(state.retrieved_chunks)

            # Reranking
            state = self._rerank(state)
            yield PipelineStage.RERANKING, len(state.ranked_chunks)

            # Synthesis
            state = self._synthesize(state)
            yield PipelineStage.SYNTHESIS, state.synthesis_result

            # Validation
            if self.config.enable_validation:
                state = self._validate_and_revise(state)
                yield PipelineStage.VALIDATION, state.critic_result

            # Complete
            response = self._build_response(state)
            yield PipelineStage.COMPLETE, response

        except Exception as e:
            logger.error(f"Streaming error: {e}")
            yield PipelineStage.COMPLETE, self._build_error_response(state, str(e))

    def _plan(self, state: RAGState) -> RAGState:
        """Execute query planning stage."""
        start = time.time()
        state.stage = PipelineStage.PLANNING

        if self.config.verbose:
            logger.info(f"Planning query: {state.query}")

        state.query_plan = self.planner.plan(state.query)

        state.stage_times["planning"] = time.time() - start

        if self.config.verbose:
            logger.info(
                f"Query plan: intent={state.query_plan.intent}, "
                f"sub_queries={len(state.query_plan.sub_queries)}"
            )

        return state

    def _retrieve(
        self,
        state: RAGState,
        filters: Optional[Dict[str, Any]],
    ) -> RAGState:
        """Execute retrieval stage."""
        start = time.time()
        state.stage = PipelineStage.RETRIEVAL

        if self.config.verbose:
            logger.info("Retrieving relevant chunks...")

        # Use hybrid retrieval with query plan
        state.retrieved_chunks = self.retriever.retrieve(
            query=state.query,
            plan=state.query_plan,
            top_k=self.config.retrieval_top_k,
            filters=filters,
        )

        state.stage_times["retrieval"] = time.time() - start

        if self.config.verbose:
            logger.info(f"Retrieved {len(state.retrieved_chunks)} chunks")

        return state

    def _rerank(self, state: RAGState) -> RAGState:
        """Execute reranking stage."""
        start = time.time()
        state.stage = PipelineStage.RERANKING

        if not state.retrieved_chunks:
            state.ranked_chunks = []
            return state

        if self.config.verbose:
            logger.info("Reranking results...")

        state.ranked_chunks = self.reranker.rerank(
            query=state.query,
            results=state.retrieved_chunks,
            top_k=self.config.final_top_k,
        )

        state.stage_times["reranking"] = time.time() - start

        if self.config.verbose:
            logger.info(f"Reranked to {len(state.ranked_chunks)} chunks")

        return state

    def _synthesize(self, state: RAGState) -> RAGState:
        """Execute synthesis stage."""
        start = time.time()
        state.stage = PipelineStage.SYNTHESIS

        if self.config.verbose:
            logger.info("Synthesizing answer...")

        state.synthesis_result = self.synthesizer.synthesize(
            query=state.query,
            results=state.ranked_chunks,
            plan=state.query_plan,
        )

        state.stage_times["synthesis"] = time.time() - start

        if self.config.verbose:
            logger.info(
                f"Synthesized answer (confidence={state.synthesis_result.confidence:.2f})"
            )

        return state

    def _validate_and_revise(self, state: RAGState) -> RAGState:
        """Execute validation and optional revision loop."""
        start = time.time()

        while state.revision_attempt <= self.config.max_revision_attempts:
            state.stage = PipelineStage.VALIDATION

            if self.config.verbose:
                logger.info(f"Validating (attempt {state.revision_attempt + 1})...")

            # Validate current synthesis
            state.critic_result = self.critic.validate(
                synthesis_result=state.synthesis_result,
                sources=state.ranked_chunks,
            )

            if state.critic_result.is_valid:
                if self.config.verbose:
                    logger.info("Validation passed!")
                break

            # Check if we should revise
            if state.revision_attempt >= self.config.max_revision_attempts:
                if self.config.verbose:
                    logger.warning("Max revision attempts reached")
                break

            # Attempt revision
            state.stage = PipelineStage.REVISION
            state.revision_attempt += 1
            state.revision_history.append(state.synthesis_result)

            if self.config.verbose:
                logger.info(f"Revising answer (attempt {state.revision_attempt})...")

            # Re-synthesize with critic feedback
            state.synthesis_result = self._revise_synthesis(state)

        state.stage_times["validation"] = time.time() - start
        return state

    def _revise_synthesis(self, state: RAGState) -> SynthesisResult:
        """Revise synthesis based on critic feedback."""
        # Add revision hints to the synthesis prompt
        # For now, just re-synthesize (a more advanced version would
        # incorporate critic feedback into the prompt)
        return self.synthesizer.synthesize(
            query=state.query,
            results=state.ranked_chunks,
            plan=state.query_plan,
        )

    def _build_response(self, state: RAGState) -> RAGResponse:
        """Build final response from state."""
        total_time = (time.time() - state.start_time) * 1000  # ms

        synthesis = state.synthesis_result
        if synthesis is None:
            return self._build_error_response(state, "No synthesis result")

        # Build query plan dict for response
        query_plan_dict = None
        if state.query_plan:
            query_plan_dict = {
                "intent": state.query_plan.intent.value,
                "sub_queries": len(state.query_plan.sub_queries),
                "expanded_terms": state.query_plan.expanded_terms[:5],
            }

        # Build validation dict
        validation_dict = None
        if state.critic_result:
            validation_dict = {
                "is_valid": state.critic_result.is_valid,
                "confidence": state.critic_result.confidence,
                "hallucination_score": state.critic_result.hallucination_score,
                "citation_accuracy": state.critic_result.citation_accuracy,
                "issues": len(state.critic_result.issues),
            }

        return RAGResponse(
            answer=synthesis.answer,
            citations=synthesis.citations,
            confidence=synthesis.confidence,
            query=state.query,
            num_sources=synthesis.num_sources_used,
            validated=state.critic_result.is_valid if state.critic_result else False,
            revision_attempts=state.revision_attempt,
            query_plan=query_plan_dict,
            validation_details=validation_dict,
            latency_ms=total_time,
        )

    def _build_error_response(
        self,
        state: RAGState,
        error: str,
    ) -> RAGResponse:
        """Build error response."""
        return RAGResponse(
            answer=f"I encountered an error processing your query: {error}",
            citations=[],
            confidence=0.0,
            query=state.query,
            num_sources=0,
            validated=False,
            revision_attempts=state.revision_attempt,
            latency_ms=(time.time() - state.start_time) * 1000,
        )

    def index_text(
        self,
        text: str,
        document_id: str,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> int:
        """
        Index text content into the vector store.

        Args:
            text: Text content to index
            document_id: Unique document identifier
            metadata: Optional metadata

        Returns:
            Number of chunks indexed
        """
        # Simple chunking
        chunk_size = 500
        overlap = 50
        chunks = []
        embeddings = []

        for i in range(0, len(text), chunk_size - overlap):
            chunk_text = text[i:i + chunk_size]
            if len(chunk_text.strip()) < 50:
                continue

            chunk_id = f"{document_id}_chunk_{len(chunks)}"
            chunks.append({
                "chunk_id": chunk_id,
                "document_id": document_id,
                "text": chunk_text,
                "page": 0,
                "chunk_type": "text",
                "source_path": metadata.get("filename", "") if metadata else "",
            })

            # Generate embedding
            embedding = self.embedder.embed_text(chunk_text)
            embeddings.append(embedding)

        if not chunks:
            return 0

        # Add to store
        self.store.add_chunks(chunks, embeddings)

        logger.info(f"Indexed {len(chunks)} chunks for document {document_id}")
        return len(chunks)

    def get_stats(self) -> Dict[str, Any]:
        """Get system statistics."""
        return {
            "total_chunks": self.store.count(),
            "model": self.config.model,
            "embedding_model": self.embedder.model_name,
            "embedding_dimension": self.embedder.embedding_dimension,
        }