File size: 5,967 Bytes
f93bbcd
 
 
 
 
 
 
ee36be0
f93bbcd
 
 
 
 
ee36be0
 
f93bbcd
 
690fe5e
f93bbcd
 
 
 
 
 
690fe5e
 
 
f93bbcd
 
 
 
690fe5e
f93bbcd
 
 
 
 
 
 
 
 
 
 
ee36be0
 
 
 
 
 
 
f93bbcd
ee36be0
f93bbcd
 
 
ee36be0
f93bbcd
 
 
 
 
ee36be0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f93bbcd
ee36be0
 
 
 
f93bbcd
ee36be0
f93bbcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
690fe5e
ee36be0
f93bbcd
 
ee36be0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f93bbcd
690fe5e
ee36be0
 
 
 
 
 
 
 
 
 
f93bbcd
ee36be0
f93bbcd
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
from __future__ import annotations

import logging
import os
import uuid
from datetime import datetime

import anthropic

from state import Artifact, Notebook

logger = logging.getLogger(__name__)

MODEL = "claude-haiku-4-5-20251001"
MAX_TOKENS = 1024


def _get_source_text(notebook: Notebook, max_chars: int = 8000, source_ids: list[str] | None = None) -> str:
    """Pull chunk text from vector store for this notebook."""
    try:
        from persistence.vector_store import VectorStore
        from ingestion_engine.embedding_generator import generate_query

        query_vector = generate_query("main ideas key concepts overview summary")
        filter_dict = None
        if source_ids:
            filter_dict = {"source_id": {"$in": source_ids}}
        matches = VectorStore().query(
            query_vector=query_vector,
            namespace=notebook.id,
            top_k=20,
            filter=filter_dict,
        )
        chunks = [m.get("text", "") for m in matches if m.get("text")]
        if chunks:
            return "\n\n".join(chunks)[:max_chars]
    except Exception as e:
        logger.warning("Could not retrieve chunks from vector store: %s", e)

    names = [s.filename for s in notebook.sources if s.status == "ready"]
    return "Sources: " + ", ".join(names) if names else "No sources available."


def _call_claude(system: str, prompt: str) -> str:
    client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
    response = client.messages.create(
        model=MODEL,
        max_tokens=MAX_TOKENS,
        system=system,
        messages=[{"role": "user", "content": prompt}],
    )
    return (response.content[0].text or "").strip()


def generate_conversation_summary(notebook: Notebook, style: str) -> Artifact:
    """Generate a conversation summary from notebook chat history."""
    style = style if style in ("brief", "detailed") else "detailed"

    if not notebook.messages:
        content = "_No conversation to summarize yet. Start chatting in the Chat tab first._"
    else:
        history = "\n\n".join(
            f"{m.role.capitalize()}: {m.content}" for m in notebook.messages
        )

        if style == "brief":
            instructions = (
                "Write a BRIEF summary (3-5 bullet points) covering:\n"
                "- The main topics discussed\n"
                "- Key questions asked and answers given\n"
                "- Any unresolved questions\n"
                "Keep it under 150 words."
            )
        else:
            instructions = (
                "Write a DETAILED summary covering:\n"
                "- The flow of the conversation\n"
                "- Each major topic explored with key insights\n"
                "- Important answers and explanations given\n"
                "- Any unresolved questions or follow-ups\n"
                "Use markdown headers and bullet points. Aim for 200-400 words."
            )

        prompt = (
            f"Summarize this study session conversation:\n\n"
            f"CONVERSATION:\n{history}\n\n"
            f"TASK:\n{instructions}\n\n"
            f"Begin with: ## Conversation Summary ({'Brief' if style == 'brief' else 'Detailed'})"
        )

        try:
            content = _call_claude(
                "You are an expert academic summarizer. Produce clear, well-structured summaries in markdown.",
                prompt,
            )
            if not content:
                raise ValueError("Empty response")
        except Exception as e:
            logger.error("Conversation summary failed: %s", e)
            content = f"_Summary generation failed: {e}. Please try again._"

    label = "Brief" if style == "brief" else "Detailed"
    return Artifact(
        id=str(uuid.uuid4()),
        type="conversation_summary",
        title=f"Conversation Summary ({label})",
        content=content,
        audio_path=None,
        created_at=datetime.now().isoformat(),
    )


def generate_document_summary(notebook: Notebook, style: str, source_ids: list[str] | None = None) -> Artifact:
    """Generate a document summary from notebook sources."""
    style = style if style in ("brief", "detailed") else "detailed"

    if style == "brief":
        instructions = (
            "Write a BRIEF summary (3-5 bullet points) covering:\n"
            "- The core theme or subject matter\n"
            "- The most important concepts or findings\n"
            "- The key takeaway\n"
            "Keep it under 150 words."
        )
    else:
        instructions = (
            "Write a DETAILED summary covering:\n"
            "- The main theme and purpose of the material\n"
            "- Each major concept or section with explanations\n"
            "- Key definitions, methods, or frameworks\n"
            "- Conclusions and practical implications\n"
            "Use markdown headers and bullet points. Aim for 300-500 words."
        )

    try:
        source_text = _get_source_text(notebook, source_ids=source_ids)
        prompt = (
            f"Summarize this study material:\n\n"
            f"SOURCE CONTENT:\n{source_text}\n\n"
            f"TASK:\n{instructions}\n\n"
            f"Begin with: ## Document Summary ({'Brief' if style == 'brief' else 'Detailed'})"
        )
        content = _call_claude(
            "You are an expert academic summarizer. Produce clear, well-structured summaries in markdown.",
            prompt,
        )
        if not content:
            raise ValueError("Empty response")
    except Exception as e:
        logger.error("Document summary failed: %s", e)
        content = f"_Summary generation failed: {e}. Please try again._"

    label = "Brief" if style == "brief" else "Detailed"
    return Artifact(
        id=str(uuid.uuid4()),
        type="document_summary",
        title=f"Document Summary ({label})",
        content=content,
        audio_path=None,
        created_at=datetime.now().isoformat(),
    )