import os from typing import List, Dict, Any, Tuple from dotenv import load_dotenv from llm import LLMProvider from pinecone_client import PineconeClient load_dotenv() def _build_prompt(query: str, contexts: List[str]) -> List[Dict[str, str]]: system = ( "You are a helpful assistant. Answer the user's question using the provided context. " "If the answer isn't in the context, say you don't know. Be concise." ) context_block = "\n\n".join([f"[Source {i+1}]\n{c}" for i, c in enumerate(contexts)]) user = f"Question: {query}\n\nContext:\n{context_block}" return [ {"role": "system", "content": system}, {"role": "user", "content": user}, ] def _build_citation_prompt(query: str, contexts: List[str]) -> List[Dict[str, str]]: system = ( "You are a helpful assistant. Answer the user's question using the provided context. " "IMPORTANT: Use inline citations [1], [2], [3] etc. to reference specific sources. " "Each citation number should correspond to the source number from the context. " "If the answer isn't in the context, say you don't know. Be concise and accurate." ) context_block = "\n\n".join([f"[Source {i+1}]\n{c}" for i, c in enumerate(contexts)]) user = f"Question: {query}\n\nContext:\n{context_block}\n\nAnswer with inline citations [1], [2], etc.:" return [ {"role": "system", "content": system}, {"role": "user", "content": user}, ] class RAGCore: def __init__(self) -> None: self.llm = LLMProvider() self.pc = PineconeClient() def ensure_index(self, embedding_dim: int) -> None: self.pc.ensure_index(dimension=embedding_dim) def retrieve(self, query: str, top_k: int = 5, rerank: bool = True) -> Tuple[List[Dict[str, Any]], List[str]]: q_vec = self.llm.embed_texts([query])[0] results = self.pc.query(vector=q_vec, top_k=top_k) matches = results.get("matches", []) docs: List[Dict[str, Any]] = [] for m in matches: md = m.get("metadata", {}) or {} text = md.get("text", "") docs.append({ "id": m.get("id"), "text": text, "score": float(m.get("score", 0.0)), "metadata": md, }) if rerank: docs = self.llm.rerank(query, docs) contexts = [d["text"] for d in docs] return docs, contexts def generate(self, query: str, contexts: List[str]) -> str: messages = _build_prompt(query, contexts) return self.llm.chat(messages) def generate_with_citations(self, query: str, contexts: List[str]) -> str: """Generate answer with inline citations [1], [2], etc.""" if not contexts: return "No relevant context found to answer this question." messages = _build_citation_prompt(query, contexts) return self.llm.chat(messages)