agent-project / core /summarizer.py
ego
update home
737b76d
from core.models import get_llm
from prompts import SUMMARY_MAP_PROMPT, SUMMARY_REDUCE_PROMPT
from langchain_core.output_parsers import StrOutputParser
from langchain_text_splitters import CharacterTextSplitter
class Summarizer:
def __init__(self):
self.llm = get_llm()
def generate_deep_summary(self, full_text):
total_length = len(full_text)
if total_length < 25000:
chunk_size = 5000
else:
dynamic_chunk_size = total_length // 5
chunk_size = max(dynamic_chunk_size, 20000)
text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_size // 10)
chunks = text_splitter.split_text(full_text)
map_chain = SUMMARY_MAP_PROMPT | self.llm | StrOutputParser()
inputs = [{"text": chunk} for chunk in chunks]
summaries = map_chain.batch(inputs)
all_summaries = "\n\n".join(summaries)
reduce_chain = SUMMARY_REDUCE_PROMPT | self.llm | StrOutputParser()
deep_summary = reduce_chain.invoke({"text": all_summaries})
return deep_summary