Spaces:
Sleeping
Sleeping
File size: 2,412 Bytes
94b06be |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
from typing import List, Dict
from pydantic import BaseModel
from agent.adapters import LLMAdapter, EmbeddingsAdapter
from agent.prompts import SYSTEM_ANSWER, BRIEF_PROMPT
from vector.store import FaissStore
class Chunk(BaseModel):
text: str
source: str
title: str
def chunk_text(text: str, source: str, title: str, size=1200, overlap=200) -> List[Chunk]:
out, i = [], 0
while i < len(text):
out.append(Chunk(text=text[i:i+size], source=source, title=title))
i += size - overlap
return out
class AgentGraph:
def __init__(self, index_dir: str):
self.llm = LLMAdapter()
self.emb = EmbeddingsAdapter()
self.index = FaissStore(dim=1536, index_dir=index_dir) # 3072 for text-embedding-3-large
def build_index(self, docs: List[Dict]):
chunks = []
for d in docs:
chunks += chunk_text(d["text"], d["url"], d["title"])
vecs = self.emb.embed([c.text for c in chunks])
metas = [c.model_dump() for c in chunks]
self.index.add(vecs, metas)
self.index.save()
def answer(self, question: str) -> Dict:
qv = self.emb.embed([question])[0]
hits = self.index.search(qv, k=6)
ctx_blocks = []
mapping = {}
for i, (score, meta) in enumerate(hits, start=1):
tag = f"S{i}"
mapping[tag] = {"title": meta["title"], "url": meta["source"], "score": score}
ctx_blocks.append(f"[{tag}] {meta['title']} — {meta['source']}\n{meta['text']}\n")
messages = [
{"role":"system","content": SYSTEM_ANSWER},
{"role":"user","content": f"Question: {question}\n\nContext:\n" + "\n\n".join(ctx_blocks)}
]
reply = self.llm.chat(messages)
return {"answer": reply, "sources": mapping}
def brief(self) -> Dict:
seed = "company overview latest results kpis risks guidance"
qv = self.emb.embed([seed])[0]
hits = self.index.search(qv, k=8)
ctx = "\n\n".join([h[1]["text"] for h in hits])
messages = [
{"role":"system","content": SYSTEM_ANSWER},
{"role":"user","content": f"{BRIEF_PROMPT}\n\nContext:\n{ctx}"}
]
reply = self.llm.chat(messages)
src = {f"S{i+1}":{"title":h[1]["title"],"url":h[1]["source"],"score":h[0]} for i,h in enumerate(hits)}
return {"brief": reply, "sources": src}
|