Spaces:

NinjainPJs
/

Ragcore

Sleeping

File size: 3,967 Bytes

a34068e

import logging
import time
from collections.abc import AsyncGenerator

from app.core.llm import GeminiService
from app.core.reranker import RerankerService
from app.models.schemas import GeneratedAnswer, RetrievedChunk

logger = logging.getLogger(__name__)

SYSTEM_PROMPT = """You are a helpful assistant answering questions based on the provided context.

CONTEXT:
{context}

RULES:
- Answer based ONLY on the provided context.
- Cite sources using [1], [2], etc. inline after the relevant information.
- If the context doesn't contain enough information, say "I don't have enough information in the provided documents to answer this question."
- Be concise but thorough.
- Use markdown formatting for readability.

QUESTION: {query}

ANSWER:"""

SUMMARY_PROMPT = """You are a helpful assistant. Summarize the following context.

CONTEXT:
{context}

RULES:
- Provide a structured summary using markdown.
- Cite sources using [1], [2], etc.
- Cover the key points from all provided sources.

QUESTION: {query}

SUMMARY:"""


class AnswerGenerator:
    def __init__(self, llm: GeminiService, reranker: RerankerService):
        self.llm = llm
        self.reranker = reranker

    def _build_context(self, chunks: list[RetrievedChunk]) -> str:
        parts = []
        for i, chunk in enumerate(chunks, 1):
            source = chunk.metadata.source or "unknown"
            header = f"[{i}] (Source: {source})"
            parts.append(f"{header}\n{chunk.text}")
        return "\n\n".join(parts)

    def _build_prompt(self, query: str, chunks: list[RetrievedChunk], intent: str = "factual") -> str:
        context = self._build_context(chunks)
        template = SUMMARY_PROMPT if intent == "summarize" else SYSTEM_PROMPT
        return template.format(context=context, query=query)

    def generate_answer(
        self,
        query: str,
        chunks: list[RetrievedChunk],
        rerank_top_k: int = 5,
        intent: str = "factual",
    ) -> GeneratedAnswer:
        start = time.perf_counter()

        # Rerank
        reranked = self.reranker.rerank(query, chunks, top_k=rerank_top_k)
        if not reranked:
            return GeneratedAnswer(
                query=query,
                answer="No relevant documents found to answer your question.",
                sources=[],
                generation_time_ms=0,
                model=self.llm.model_name,
            )

        prompt = self._build_prompt(query, reranked, intent)
        answer = self.llm.generate(prompt)

        elapsed = (time.perf_counter() - start) * 1000
        logger.info(f"Generated answer in {elapsed:.0f}ms")

        return GeneratedAnswer(
            query=query,
            answer=answer,
            sources=reranked,
            generation_time_ms=elapsed,
            model=self.llm.model_name,
        )

    async def generate_answer_stream(
        self,
        query: str,
        chunks: list[RetrievedChunk],
        rerank_top_k: int = 5,
        intent: str = "factual",
    ) -> AsyncGenerator[str | GeneratedAnswer, None]:
        # Rerank
        reranked = self.reranker.rerank(query, chunks, top_k=rerank_top_k)
        if not reranked:
            yield GeneratedAnswer(
                query=query,
                answer="No relevant documents found to answer your question.",
                sources=[],
                generation_time_ms=0,
                model=self.llm.model_name,
            )
            return

        prompt = self._build_prompt(query, reranked, intent)
        start = time.perf_counter()

        async for text_chunk in self.llm.generate_stream(prompt):
            yield text_chunk

        elapsed = (time.perf_counter() - start) * 1000

        # Final message with sources
        yield GeneratedAnswer(
            query=query,
            answer="",  # Full answer was streamed
            sources=reranked,
            generation_time_ms=elapsed,
            model=self.llm.model_name,
        )