from core.models import get_llm from prompts import SUMMARY_MAP_PROMPT, SUMMARY_REDUCE_PROMPT from langchain_core.output_parsers import StrOutputParser from langchain_text_splitters import CharacterTextSplitter class Summarizer: def __init__(self): self.llm = get_llm() def generate_deep_summary(self, full_text): total_length = len(full_text) if total_length < 25000: chunk_size = 5000 else: dynamic_chunk_size = total_length // 5 chunk_size = max(dynamic_chunk_size, 20000) text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_size // 10) chunks = text_splitter.split_text(full_text) map_chain = SUMMARY_MAP_PROMPT | self.llm | StrOutputParser() inputs = [{"text": chunk} for chunk in chunks] summaries = map_chain.batch(inputs) all_summaries = "\n\n".join(summaries) reduce_chain = SUMMARY_REDUCE_PROMPT | self.llm | StrOutputParser() deep_summary = reduce_chain.invoke({"text": all_summaries}) return deep_summary