""" DocMind — Quick Summary Generates a concise one-paragraph summary (~100 words) from the first 3 and last 2 chunks of a document. """ import logging from typing import List from pipeline.chunker import ChunkMetadata from pipeline.llm import generate_summary logger = logging.getLogger(__name__) QUICK_SUMMARY_PROMPT = ( "Summarize this document in one paragraph of approximately 100 words. " "Focus on the main topic, key claims, and conclusion. " "Be factual and concise. Do not add any information not present in the text." ) def generate_quick_summary(chunks: List[ChunkMetadata]) -> str: """ Generate a quick one-paragraph summary. Strategy: Use the first 3 chunks (introduction) and last 2 chunks (conclusion) to capture the document's scope. Args: chunks: All chunks from the document, in order. Returns: A single paragraph summary string. """ if not chunks: return "No document content available for summarization." # Select representative chunks if len(chunks) <= 5: selected = chunks else: selected = chunks[:3] + chunks[-2:] combined_text = "\n\n".join( f"[Page {c.page_num}] {c.text}" for c in selected ) logger.info("Generating quick summary from %d chunks", len(selected)) return generate_summary(combined_text, QUICK_SUMMARY_PROMPT)