Spaces:

NinjainPJs
/

Ragcore

Sleeping

App Files Files Community

Ragcore / app /core /generator.py

NinjainPJs

Initial deploy: RagCore RAG system with hybrid search and Gradio UI

a34068e 3 months ago

raw

history blame contribute delete

3.97 kB

	import logging
	import time
	from collections.abc import AsyncGenerator

	from app.core.llm import GeminiService
	from app.core.reranker import RerankerService
	from app.models.schemas import GeneratedAnswer, RetrievedChunk

	logger = logging.getLogger(__name__)

	SYSTEM_PROMPT = """You are a helpful assistant answering questions based on the provided context.

	CONTEXT:
	{context}

	RULES:
	- Answer based ONLY on the provided context.
	- Cite sources using [1], [2], etc. inline after the relevant information.
	- If the context doesn't contain enough information, say "I don't have enough information in the provided documents to answer this question."
	- Be concise but thorough.
	- Use markdown formatting for readability.

	QUESTION: {query}

	ANSWER:"""

	SUMMARY_PROMPT = """You are a helpful assistant. Summarize the following context.

	CONTEXT:
	{context}

	RULES:
	- Provide a structured summary using markdown.
	- Cite sources using [1], [2], etc.
	- Cover the key points from all provided sources.

	QUESTION: {query}

	SUMMARY:"""


	class AnswerGenerator:
	def __init__(self, llm: GeminiService, reranker: RerankerService):
	self.llm = llm
	self.reranker = reranker

	def _build_context(self, chunks: list[RetrievedChunk]) -> str:
	parts = []
	for i, chunk in enumerate(chunks, 1):
	source = chunk.metadata.source or "unknown"
	header = f"[{i}] (Source: {source})"
	parts.append(f"{header}\n{chunk.text}")
	return "\n\n".join(parts)

	def _build_prompt(self, query: str, chunks: list[RetrievedChunk], intent: str = "factual") -> str:
	context = self._build_context(chunks)
	template = SUMMARY_PROMPT if intent == "summarize" else SYSTEM_PROMPT
	return template.format(context=context, query=query)

	def generate_answer(
	self,
	query: str,
	chunks: list[RetrievedChunk],
	rerank_top_k: int = 5,
	intent: str = "factual",
	) -> GeneratedAnswer:
	start = time.perf_counter()

	# Rerank
	reranked = self.reranker.rerank(query, chunks, top_k=rerank_top_k)
	if not reranked:
	return GeneratedAnswer(
	query=query,
	answer="No relevant documents found to answer your question.",
	sources=[],
	generation_time_ms=0,
	model=self.llm.model_name,
	)

	prompt = self._build_prompt(query, reranked, intent)
	answer = self.llm.generate(prompt)

	elapsed = (time.perf_counter() - start) * 1000
	logger.info(f"Generated answer in {elapsed:.0f}ms")

	return GeneratedAnswer(
	query=query,
	answer=answer,
	sources=reranked,
	generation_time_ms=elapsed,
	model=self.llm.model_name,
	)

	async def generate_answer_stream(
	self,
	query: str,
	chunks: list[RetrievedChunk],
	rerank_top_k: int = 5,
	intent: str = "factual",
	) -> AsyncGenerator[str \| GeneratedAnswer, None]:
	# Rerank
	reranked = self.reranker.rerank(query, chunks, top_k=rerank_top_k)
	if not reranked:
	yield GeneratedAnswer(
	query=query,
	answer="No relevant documents found to answer your question.",
	sources=[],
	generation_time_ms=0,
	model=self.llm.model_name,
	)
	return

	prompt = self._build_prompt(query, reranked, intent)
	start = time.perf_counter()

	async for text_chunk in self.llm.generate_stream(prompt):
	yield text_chunk

	elapsed = (time.perf_counter() - start) * 1000

	# Final message with sources
	yield GeneratedAnswer(
	query=query,
	answer="", # Full answer was streamed
	sources=reranked,
	generation_time_ms=elapsed,
	model=self.llm.model_name,
	)