import logging import time from collections.abc import AsyncGenerator from app.core.llm import GeminiService from app.core.reranker import RerankerService from app.models.schemas import GeneratedAnswer, RetrievedChunk logger = logging.getLogger(__name__) SYSTEM_PROMPT = """You are a helpful assistant answering questions based on the provided context. CONTEXT: {context} RULES: - Answer based ONLY on the provided context. - Cite sources using [1], [2], etc. inline after the relevant information. - If the context doesn't contain enough information, say "I don't have enough information in the provided documents to answer this question." - Be concise but thorough. - Use markdown formatting for readability. QUESTION: {query} ANSWER:""" SUMMARY_PROMPT = """You are a helpful assistant. Summarize the following context. CONTEXT: {context} RULES: - Provide a structured summary using markdown. - Cite sources using [1], [2], etc. - Cover the key points from all provided sources. QUESTION: {query} SUMMARY:""" class AnswerGenerator: def __init__(self, llm: GeminiService, reranker: RerankerService): self.llm = llm self.reranker = reranker def _build_context(self, chunks: list[RetrievedChunk]) -> str: parts = [] for i, chunk in enumerate(chunks, 1): source = chunk.metadata.source or "unknown" header = f"[{i}] (Source: {source})" parts.append(f"{header}\n{chunk.text}") return "\n\n".join(parts) def _build_prompt(self, query: str, chunks: list[RetrievedChunk], intent: str = "factual") -> str: context = self._build_context(chunks) template = SUMMARY_PROMPT if intent == "summarize" else SYSTEM_PROMPT return template.format(context=context, query=query) def generate_answer( self, query: str, chunks: list[RetrievedChunk], rerank_top_k: int = 5, intent: str = "factual", ) -> GeneratedAnswer: start = time.perf_counter() # Rerank reranked = self.reranker.rerank(query, chunks, top_k=rerank_top_k) if not reranked: return GeneratedAnswer( query=query, answer="No relevant documents found to answer your question.", sources=[], generation_time_ms=0, model=self.llm.model_name, ) prompt = self._build_prompt(query, reranked, intent) answer = self.llm.generate(prompt) elapsed = (time.perf_counter() - start) * 1000 logger.info(f"Generated answer in {elapsed:.0f}ms") return GeneratedAnswer( query=query, answer=answer, sources=reranked, generation_time_ms=elapsed, model=self.llm.model_name, ) async def generate_answer_stream( self, query: str, chunks: list[RetrievedChunk], rerank_top_k: int = 5, intent: str = "factual", ) -> AsyncGenerator[str | GeneratedAnswer, None]: # Rerank reranked = self.reranker.rerank(query, chunks, top_k=rerank_top_k) if not reranked: yield GeneratedAnswer( query=query, answer="No relevant documents found to answer your question.", sources=[], generation_time_ms=0, model=self.llm.model_name, ) return prompt = self._build_prompt(query, reranked, intent) start = time.perf_counter() async for text_chunk in self.llm.generate_stream(prompt): yield text_chunk elapsed = (time.perf_counter() - start) * 1000 # Final message with sources yield GeneratedAnswer( query=query, answer="", # Full answer was streamed sources=reranked, generation_time_ms=elapsed, model=self.llm.model_name, )