"""RAG QA chain with streaming and multi-doc modes.""" from typing import Dict, Iterator, List, Literal, Optional, Union from db.faiss_client import FaissDB from models.embedder import MiniCPMEmbedder from models.llm import MiniCPMLLM from services.retrieval import RetrievalService from utils.formatters import build_context_block from utils.response_cleaner import clean_model_response SearchMode = Literal["all", "single"] class QAService: def __init__( self, embedder: MiniCPMEmbedder, llm: MiniCPMLLM, db: FaissDB, ): self.embedder = embedder self.llm = llm self.db = db self.retrieval = RetrievalService(embedder, db) def stream_answer( self, query: str, chat_history: Optional[List[Dict]] = None, document_ids: Optional[List[str]] = None, mode: SearchMode = "all", ) -> Iterator[Union[str, Dict]]: if mode == "single": filter_ids = [document_ids[0]] if document_ids else None elif document_ids: filter_ids = document_ids else: filter_ids = None results = self.retrieval.search(query, document_ids=filter_ids) yield {"type": "sources", "data": results} if not results: yield "I couldn't find relevant information in the loaded documents." return context = build_context_block(results) full_answer = [] for token in self.llm.stream_answer(query, context, chat_history): full_answer.append(token) yield token answer_text = clean_model_response("".join(full_answer)) confidence = self.llm.evaluate_confidence(query, context, answer_text) yield {"type": "confidence", "data": confidence}