NotebookLM / services /summary_service.py
internomega-terrablue
source selection
690fe5e
from __future__ import annotations
import logging
import os
import uuid
from datetime import datetime
import anthropic
from state import Artifact, Notebook
logger = logging.getLogger(__name__)
MODEL = "claude-haiku-4-5-20251001"
MAX_TOKENS = 1024
def _get_source_text(notebook: Notebook, max_chars: int = 8000, source_ids: list[str] | None = None) -> str:
"""Pull chunk text from vector store for this notebook."""
try:
from persistence.vector_store import VectorStore
from ingestion_engine.embedding_generator import generate_query
query_vector = generate_query("main ideas key concepts overview summary")
filter_dict = None
if source_ids:
filter_dict = {"source_id": {"$in": source_ids}}
matches = VectorStore().query(
query_vector=query_vector,
namespace=notebook.id,
top_k=20,
filter=filter_dict,
)
chunks = [m.get("text", "") for m in matches if m.get("text")]
if chunks:
return "\n\n".join(chunks)[:max_chars]
except Exception as e:
logger.warning("Could not retrieve chunks from vector store: %s", e)
names = [s.filename for s in notebook.sources if s.status == "ready"]
return "Sources: " + ", ".join(names) if names else "No sources available."
def _call_claude(system: str, prompt: str) -> str:
client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
response = client.messages.create(
model=MODEL,
max_tokens=MAX_TOKENS,
system=system,
messages=[{"role": "user", "content": prompt}],
)
return (response.content[0].text or "").strip()
def generate_conversation_summary(notebook: Notebook, style: str) -> Artifact:
"""Generate a conversation summary from notebook chat history."""
style = style if style in ("brief", "detailed") else "detailed"
if not notebook.messages:
content = "_No conversation to summarize yet. Start chatting in the Chat tab first._"
else:
history = "\n\n".join(
f"{m.role.capitalize()}: {m.content}" for m in notebook.messages
)
if style == "brief":
instructions = (
"Write a BRIEF summary (3-5 bullet points) covering:\n"
"- The main topics discussed\n"
"- Key questions asked and answers given\n"
"- Any unresolved questions\n"
"Keep it under 150 words."
)
else:
instructions = (
"Write a DETAILED summary covering:\n"
"- The flow of the conversation\n"
"- Each major topic explored with key insights\n"
"- Important answers and explanations given\n"
"- Any unresolved questions or follow-ups\n"
"Use markdown headers and bullet points. Aim for 200-400 words."
)
prompt = (
f"Summarize this study session conversation:\n\n"
f"CONVERSATION:\n{history}\n\n"
f"TASK:\n{instructions}\n\n"
f"Begin with: ## Conversation Summary ({'Brief' if style == 'brief' else 'Detailed'})"
)
try:
content = _call_claude(
"You are an expert academic summarizer. Produce clear, well-structured summaries in markdown.",
prompt,
)
if not content:
raise ValueError("Empty response")
except Exception as e:
logger.error("Conversation summary failed: %s", e)
content = f"_Summary generation failed: {e}. Please try again._"
label = "Brief" if style == "brief" else "Detailed"
return Artifact(
id=str(uuid.uuid4()),
type="conversation_summary",
title=f"Conversation Summary ({label})",
content=content,
audio_path=None,
created_at=datetime.now().isoformat(),
)
def generate_document_summary(notebook: Notebook, style: str, source_ids: list[str] | None = None) -> Artifact:
"""Generate a document summary from notebook sources."""
style = style if style in ("brief", "detailed") else "detailed"
if style == "brief":
instructions = (
"Write a BRIEF summary (3-5 bullet points) covering:\n"
"- The core theme or subject matter\n"
"- The most important concepts or findings\n"
"- The key takeaway\n"
"Keep it under 150 words."
)
else:
instructions = (
"Write a DETAILED summary covering:\n"
"- The main theme and purpose of the material\n"
"- Each major concept or section with explanations\n"
"- Key definitions, methods, or frameworks\n"
"- Conclusions and practical implications\n"
"Use markdown headers and bullet points. Aim for 300-500 words."
)
try:
source_text = _get_source_text(notebook, source_ids=source_ids)
prompt = (
f"Summarize this study material:\n\n"
f"SOURCE CONTENT:\n{source_text}\n\n"
f"TASK:\n{instructions}\n\n"
f"Begin with: ## Document Summary ({'Brief' if style == 'brief' else 'Detailed'})"
)
content = _call_claude(
"You are an expert academic summarizer. Produce clear, well-structured summaries in markdown.",
prompt,
)
if not content:
raise ValueError("Empty response")
except Exception as e:
logger.error("Document summary failed: %s", e)
content = f"_Summary generation failed: {e}. Please try again._"
label = "Brief" if style == "brief" else "Detailed"
return Artifact(
id=str(uuid.uuid4()),
type="document_summary",
title=f"Document Summary ({label})",
content=content,
audio_path=None,
created_at=datetime.now().isoformat(),
)