docmind / summarizer /quick_summary.py
AI Engineer
Initial commit for DocMind
6cca5b1
Raw
History Blame Contribute Delete
1.39 kB
"""
DocMind — Quick Summary
Generates a concise one-paragraph summary (~100 words) from the
first 3 and last 2 chunks of a document.
"""
import logging
from typing import List
from pipeline.chunker import ChunkMetadata
from pipeline.llm import generate_summary
logger = logging.getLogger(__name__)
QUICK_SUMMARY_PROMPT = (
"Summarize this document in one paragraph of approximately 100 words. "
"Focus on the main topic, key claims, and conclusion. "
"Be factual and concise. Do not add any information not present in the text."
)
def generate_quick_summary(chunks: List[ChunkMetadata]) -> str:
"""
Generate a quick one-paragraph summary.
Strategy: Use the first 3 chunks (introduction) and last 2 chunks
(conclusion) to capture the document's scope.
Args:
chunks: All chunks from the document, in order.
Returns:
A single paragraph summary string.
"""
if not chunks:
return "No document content available for summarization."
# Select representative chunks
if len(chunks) <= 5:
selected = chunks
else:
selected = chunks[:3] + chunks[-2:]
combined_text = "\n\n".join(
f"[Page {c.page_num}] {c.text}" for c in selected
)
logger.info("Generating quick summary from %d chunks", len(selected))
return generate_summary(combined_text, QUICK_SUMMARY_PROMPT)