from typing import List, Dict from pydantic import BaseModel from agent.adapters import LLMAdapter, EmbeddingsAdapter from agent.prompts import SYSTEM_ANSWER, BRIEF_PROMPT from vector.store import FaissStore class Chunk(BaseModel): text: str source: str title: str def chunk_text(text: str, source: str, title: str, size=1200, overlap=200) -> List[Chunk]: out, i = [], 0 while i < len(text): out.append(Chunk(text=text[i:i+size], source=source, title=title)) i += size - overlap return out class AgentGraph: def __init__(self, index_dir: str): self.llm = LLMAdapter() self.emb = EmbeddingsAdapter() self.index = FaissStore(dim=1536, index_dir=index_dir) # 3072 for text-embedding-3-large def build_index(self, docs: List[Dict]): chunks = [] for d in docs: chunks += chunk_text(d["text"], d["url"], d["title"]) vecs = self.emb.embed([c.text for c in chunks]) metas = [c.model_dump() for c in chunks] self.index.add(vecs, metas) self.index.save() def answer(self, question: str) -> Dict: qv = self.emb.embed([question])[0] hits = self.index.search(qv, k=6) ctx_blocks = [] mapping = {} for i, (score, meta) in enumerate(hits, start=1): tag = f"S{i}" mapping[tag] = {"title": meta["title"], "url": meta["source"], "score": score} ctx_blocks.append(f"[{tag}] {meta['title']} — {meta['source']}\n{meta['text']}\n") messages = [ {"role":"system","content": SYSTEM_ANSWER}, {"role":"user","content": f"Question: {question}\n\nContext:\n" + "\n\n".join(ctx_blocks)} ] reply = self.llm.chat(messages) return {"answer": reply, "sources": mapping} def brief(self) -> Dict: seed = "company overview latest results kpis risks guidance" qv = self.emb.embed([seed])[0] hits = self.index.search(qv, k=8) ctx = "\n\n".join([h[1]["text"] for h in hits]) messages = [ {"role":"system","content": SYSTEM_ANSWER}, {"role":"user","content": f"{BRIEF_PROMPT}\n\nContext:\n{ctx}"} ] reply = self.llm.chat(messages) src = {f"S{i+1}":{"title":h[1]["title"],"url":h[1]["source"],"score":h[0]} for i,h in enumerate(hits)} return {"brief": reply, "sources": src}