Spaces:
Sleeping
Sleeping
| from typing import List, Dict | |
| from pydantic import BaseModel | |
| from agent.adapters import LLMAdapter, EmbeddingsAdapter | |
| from agent.prompts import SYSTEM_ANSWER, BRIEF_PROMPT | |
| from vector.store import FaissStore | |
| class Chunk(BaseModel): | |
| text: str | |
| source: str | |
| title: str | |
| def chunk_text(text: str, source: str, title: str, size=1200, overlap=200) -> List[Chunk]: | |
| out, i = [], 0 | |
| while i < len(text): | |
| out.append(Chunk(text=text[i:i+size], source=source, title=title)) | |
| i += size - overlap | |
| return out | |
| class AgentGraph: | |
| def __init__(self, index_dir: str): | |
| self.llm = LLMAdapter() | |
| self.emb = EmbeddingsAdapter() | |
| self.index = FaissStore(dim=1536, index_dir=index_dir) # 3072 for text-embedding-3-large | |
| def build_index(self, docs: List[Dict]): | |
| chunks = [] | |
| for d in docs: | |
| chunks += chunk_text(d["text"], d["url"], d["title"]) | |
| vecs = self.emb.embed([c.text for c in chunks]) | |
| metas = [c.model_dump() for c in chunks] | |
| self.index.add(vecs, metas) | |
| self.index.save() | |
| def answer(self, question: str) -> Dict: | |
| qv = self.emb.embed([question])[0] | |
| hits = self.index.search(qv, k=6) | |
| ctx_blocks = [] | |
| mapping = {} | |
| for i, (score, meta) in enumerate(hits, start=1): | |
| tag = f"S{i}" | |
| mapping[tag] = {"title": meta["title"], "url": meta["source"], "score": score} | |
| ctx_blocks.append(f"[{tag}] {meta['title']} — {meta['source']}\n{meta['text']}\n") | |
| messages = [ | |
| {"role":"system","content": SYSTEM_ANSWER}, | |
| {"role":"user","content": f"Question: {question}\n\nContext:\n" + "\n\n".join(ctx_blocks)} | |
| ] | |
| reply = self.llm.chat(messages) | |
| return {"answer": reply, "sources": mapping} | |
| def brief(self) -> Dict: | |
| seed = "company overview latest results kpis risks guidance" | |
| qv = self.emb.embed([seed])[0] | |
| hits = self.index.search(qv, k=8) | |
| ctx = "\n\n".join([h[1]["text"] for h in hits]) | |
| messages = [ | |
| {"role":"system","content": SYSTEM_ANSWER}, | |
| {"role":"user","content": f"{BRIEF_PROMPT}\n\nContext:\n{ctx}"} | |
| ] | |
| reply = self.llm.chat(messages) | |
| src = {f"S{i+1}":{"title":h[1]["title"],"url":h[1]["source"],"score":h[0]} for i,h in enumerate(hits)} | |
| return {"brief": reply, "sources": src} | |