Spaces:
Sleeping
Sleeping
| from core.models import get_llm | |
| from prompts import SUMMARY_MAP_PROMPT, SUMMARY_REDUCE_PROMPT | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain_text_splitters import CharacterTextSplitter | |
| class Summarizer: | |
| def __init__(self): | |
| self.llm = get_llm() | |
| def generate_deep_summary(self, full_text): | |
| total_length = len(full_text) | |
| if total_length < 25000: | |
| chunk_size = 5000 | |
| else: | |
| dynamic_chunk_size = total_length // 5 | |
| chunk_size = max(dynamic_chunk_size, 20000) | |
| text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_size // 10) | |
| chunks = text_splitter.split_text(full_text) | |
| map_chain = SUMMARY_MAP_PROMPT | self.llm | StrOutputParser() | |
| inputs = [{"text": chunk} for chunk in chunks] | |
| summaries = map_chain.batch(inputs) | |
| all_summaries = "\n\n".join(summaries) | |
| reduce_chain = SUMMARY_REDUCE_PROMPT | self.llm | StrOutputParser() | |
| deep_summary = reduce_chain.invoke({"text": all_summaries}) | |
| return deep_summary | |