Spaces:

Group-1-5010
/

NotebookLM

Sleeping

File size: 5,967 Bytes

f93bbcd
 
 
 
 
 
 
ee36be0
f93bbcd
 
 
 
 
ee36be0
 
f93bbcd
 
690fe5e
f93bbcd
 
 
 
 
 
690fe5e
 
 
f93bbcd
 
 
 
690fe5e
f93bbcd
 
 
 
 
 
 
 
 
 
 
ee36be0
 
 
 
 
 
 
f93bbcd
ee36be0
f93bbcd
 
 
ee36be0
f93bbcd
 
 
 
 
ee36be0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f93bbcd
ee36be0
 
 
 
f93bbcd
ee36be0
f93bbcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
690fe5e
ee36be0
f93bbcd
 
ee36be0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f93bbcd
690fe5e
ee36be0
 
 
 
 
 
 
 
 
 
f93bbcd
ee36be0
f93bbcd

from __future__ import annotations

import logging
import os
import uuid
from datetime import datetime

import anthropic

from state import Artifact, Notebook

logger = logging.getLogger(__name__)

MODEL = "claude-haiku-4-5-20251001"
MAX_TOKENS = 1024


def _get_source_text(notebook: Notebook, max_chars: int = 8000, source_ids: list[str] | None = None) -> str:
    """Pull chunk text from vector store for this notebook."""
    try:
        from persistence.vector_store import VectorStore
        from ingestion_engine.embedding_generator import generate_query

        query_vector = generate_query("main ideas key concepts overview summary")
        filter_dict = None
        if source_ids:
            filter_dict = {"source_id": {"$in": source_ids}}
        matches = VectorStore().query(
            query_vector=query_vector,
            namespace=notebook.id,
            top_k=20,
            filter=filter_dict,
        )
        chunks = [m.get("text", "") for m in matches if m.get("text")]
        if chunks:
            return "\n\n".join(chunks)[:max_chars]
    except Exception as e:
        logger.warning("Could not retrieve chunks from vector store: %s", e)

    names = [s.filename for s in notebook.sources if s.status == "ready"]
    return "Sources: " + ", ".join(names) if names else "No sources available."


def _call_claude(system: str, prompt: str) -> str:
    client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
    response = client.messages.create(
        model=MODEL,
        max_tokens=MAX_TOKENS,
        system=system,
        messages=[{"role": "user", "content": prompt}],
    )
    return (response.content[0].text or "").strip()


def generate_conversation_summary(notebook: Notebook, style: str) -> Artifact:
    """Generate a conversation summary from notebook chat history."""
    style = style if style in ("brief", "detailed") else "detailed"

    if not notebook.messages:
        content = "_No conversation to summarize yet. Start chatting in the Chat tab first._"
    else:
        history = "\n\n".join(
            f"{m.role.capitalize()}: {m.content}" for m in notebook.messages
        )

        if style == "brief":
            instructions = (
                "Write a BRIEF summary (3-5 bullet points) covering:\n"
                "- The main topics discussed\n"
                "- Key questions asked and answers given\n"
                "- Any unresolved questions\n"
                "Keep it under 150 words."
            )
        else:
            instructions = (
                "Write a DETAILED summary covering:\n"
                "- The flow of the conversation\n"
                "- Each major topic explored with key insights\n"
                "- Important answers and explanations given\n"
                "- Any unresolved questions or follow-ups\n"
                "Use markdown headers and bullet points. Aim for 200-400 words."
            )

        prompt = (
            f"Summarize this study session conversation:\n\n"
            f"CONVERSATION:\n{history}\n\n"
            f"TASK:\n{instructions}\n\n"
            f"Begin with: ## Conversation Summary ({'Brief' if style == 'brief' else 'Detailed'})"
        )

        try:
            content = _call_claude(
                "You are an expert academic summarizer. Produce clear, well-structured summaries in markdown.",
                prompt,
            )
            if not content:
                raise ValueError("Empty response")
        except Exception as e:
            logger.error("Conversation summary failed: %s", e)
            content = f"_Summary generation failed: {e}. Please try again._"

    label = "Brief" if style == "brief" else "Detailed"
    return Artifact(
        id=str(uuid.uuid4()),
        type="conversation_summary",
        title=f"Conversation Summary ({label})",
        content=content,
        audio_path=None,
        created_at=datetime.now().isoformat(),
    )


def generate_document_summary(notebook: Notebook, style: str, source_ids: list[str] | None = None) -> Artifact:
    """Generate a document summary from notebook sources."""
    style = style if style in ("brief", "detailed") else "detailed"

    if style == "brief":
        instructions = (
            "Write a BRIEF summary (3-5 bullet points) covering:\n"
            "- The core theme or subject matter\n"
            "- The most important concepts or findings\n"
            "- The key takeaway\n"
            "Keep it under 150 words."
        )
    else:
        instructions = (
            "Write a DETAILED summary covering:\n"
            "- The main theme and purpose of the material\n"
            "- Each major concept or section with explanations\n"
            "- Key definitions, methods, or frameworks\n"
            "- Conclusions and practical implications\n"
            "Use markdown headers and bullet points. Aim for 300-500 words."
        )

    try:
        source_text = _get_source_text(notebook, source_ids=source_ids)
        prompt = (
            f"Summarize this study material:\n\n"
            f"SOURCE CONTENT:\n{source_text}\n\n"
            f"TASK:\n{instructions}\n\n"
            f"Begin with: ## Document Summary ({'Brief' if style == 'brief' else 'Detailed'})"
        )
        content = _call_claude(
            "You are an expert academic summarizer. Produce clear, well-structured summaries in markdown.",
            prompt,
        )
        if not content:
            raise ValueError("Empty response")
    except Exception as e:
        logger.error("Document summary failed: %s", e)
        content = f"_Summary generation failed: {e}. Please try again._"

    label = "Brief" if style == "brief" else "Detailed"
    return Artifact(
        id=str(uuid.uuid4()),
        type="document_summary",
        title=f"Document Summary ({label})",
        content=content,
        audio_path=None,
        created_at=datetime.now().isoformat(),
    )