Spaces:
Sleeping
Sleeping
File size: 5,967 Bytes
f93bbcd ee36be0 f93bbcd ee36be0 f93bbcd 690fe5e f93bbcd 690fe5e f93bbcd 690fe5e f93bbcd ee36be0 f93bbcd ee36be0 f93bbcd ee36be0 f93bbcd ee36be0 f93bbcd ee36be0 f93bbcd ee36be0 f93bbcd 690fe5e ee36be0 f93bbcd ee36be0 f93bbcd 690fe5e ee36be0 f93bbcd ee36be0 f93bbcd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 | from __future__ import annotations
import logging
import os
import uuid
from datetime import datetime
import anthropic
from state import Artifact, Notebook
logger = logging.getLogger(__name__)
MODEL = "claude-haiku-4-5-20251001"
MAX_TOKENS = 1024
def _get_source_text(notebook: Notebook, max_chars: int = 8000, source_ids: list[str] | None = None) -> str:
"""Pull chunk text from vector store for this notebook."""
try:
from persistence.vector_store import VectorStore
from ingestion_engine.embedding_generator import generate_query
query_vector = generate_query("main ideas key concepts overview summary")
filter_dict = None
if source_ids:
filter_dict = {"source_id": {"$in": source_ids}}
matches = VectorStore().query(
query_vector=query_vector,
namespace=notebook.id,
top_k=20,
filter=filter_dict,
)
chunks = [m.get("text", "") for m in matches if m.get("text")]
if chunks:
return "\n\n".join(chunks)[:max_chars]
except Exception as e:
logger.warning("Could not retrieve chunks from vector store: %s", e)
names = [s.filename for s in notebook.sources if s.status == "ready"]
return "Sources: " + ", ".join(names) if names else "No sources available."
def _call_claude(system: str, prompt: str) -> str:
client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
response = client.messages.create(
model=MODEL,
max_tokens=MAX_TOKENS,
system=system,
messages=[{"role": "user", "content": prompt}],
)
return (response.content[0].text or "").strip()
def generate_conversation_summary(notebook: Notebook, style: str) -> Artifact:
"""Generate a conversation summary from notebook chat history."""
style = style if style in ("brief", "detailed") else "detailed"
if not notebook.messages:
content = "_No conversation to summarize yet. Start chatting in the Chat tab first._"
else:
history = "\n\n".join(
f"{m.role.capitalize()}: {m.content}" for m in notebook.messages
)
if style == "brief":
instructions = (
"Write a BRIEF summary (3-5 bullet points) covering:\n"
"- The main topics discussed\n"
"- Key questions asked and answers given\n"
"- Any unresolved questions\n"
"Keep it under 150 words."
)
else:
instructions = (
"Write a DETAILED summary covering:\n"
"- The flow of the conversation\n"
"- Each major topic explored with key insights\n"
"- Important answers and explanations given\n"
"- Any unresolved questions or follow-ups\n"
"Use markdown headers and bullet points. Aim for 200-400 words."
)
prompt = (
f"Summarize this study session conversation:\n\n"
f"CONVERSATION:\n{history}\n\n"
f"TASK:\n{instructions}\n\n"
f"Begin with: ## Conversation Summary ({'Brief' if style == 'brief' else 'Detailed'})"
)
try:
content = _call_claude(
"You are an expert academic summarizer. Produce clear, well-structured summaries in markdown.",
prompt,
)
if not content:
raise ValueError("Empty response")
except Exception as e:
logger.error("Conversation summary failed: %s", e)
content = f"_Summary generation failed: {e}. Please try again._"
label = "Brief" if style == "brief" else "Detailed"
return Artifact(
id=str(uuid.uuid4()),
type="conversation_summary",
title=f"Conversation Summary ({label})",
content=content,
audio_path=None,
created_at=datetime.now().isoformat(),
)
def generate_document_summary(notebook: Notebook, style: str, source_ids: list[str] | None = None) -> Artifact:
"""Generate a document summary from notebook sources."""
style = style if style in ("brief", "detailed") else "detailed"
if style == "brief":
instructions = (
"Write a BRIEF summary (3-5 bullet points) covering:\n"
"- The core theme or subject matter\n"
"- The most important concepts or findings\n"
"- The key takeaway\n"
"Keep it under 150 words."
)
else:
instructions = (
"Write a DETAILED summary covering:\n"
"- The main theme and purpose of the material\n"
"- Each major concept or section with explanations\n"
"- Key definitions, methods, or frameworks\n"
"- Conclusions and practical implications\n"
"Use markdown headers and bullet points. Aim for 300-500 words."
)
try:
source_text = _get_source_text(notebook, source_ids=source_ids)
prompt = (
f"Summarize this study material:\n\n"
f"SOURCE CONTENT:\n{source_text}\n\n"
f"TASK:\n{instructions}\n\n"
f"Begin with: ## Document Summary ({'Brief' if style == 'brief' else 'Detailed'})"
)
content = _call_claude(
"You are an expert academic summarizer. Produce clear, well-structured summaries in markdown.",
prompt,
)
if not content:
raise ValueError("Empty response")
except Exception as e:
logger.error("Document summary failed: %s", e)
content = f"_Summary generation failed: {e}. Please try again._"
label = "Brief" if style == "brief" else "Detailed"
return Artifact(
id=str(uuid.uuid4()),
type="document_summary",
title=f"Document Summary ({label})",
content=content,
audio_path=None,
created_at=datetime.now().isoformat(),
) |