Spaces:
Sleeping
Sleeping
internomega-terrablue commited on
Commit Β·
690fe5e
1
Parent(s): f2e32a6
source selection
Browse files- app.py +10 -1
- pages/artifacts.py +3 -2
- persistence/vector_store.py +2 -1
- services/summary_service.py +7 -3
app.py
CHANGED
|
@@ -126,6 +126,8 @@ def refresh_all(state: UserData):
|
|
| 126 |
has_nb = nb is not None
|
| 127 |
has_src = has_nb and len(nb.sources) > 0
|
| 128 |
|
|
|
|
|
|
|
| 129 |
return (
|
| 130 |
state,
|
| 131 |
# Sidebar
|
|
@@ -145,6 +147,7 @@ def refresh_all(state: UserData):
|
|
| 145 |
gr.update(visible=has_src), # artifacts_content visible
|
| 146 |
# Artifact sub-sections
|
| 147 |
render_conv_summary_section(state),
|
|
|
|
| 148 |
render_doc_summary_section(state),
|
| 149 |
render_podcast_section(state),
|
| 150 |
render_quiz_section(state),
|
|
@@ -344,6 +347,11 @@ with gr.Blocks(css=CUSTOM_CSS, theme=dark_theme, title="NotebookLM", js=BEFOREUN
|
|
| 344 |
'<p style="font-size:0.82rem; color:#808098; margin:2px 0 0 0;">'
|
| 345 |
'Summarize content from your uploaded sources.</p></div></div>'
|
| 346 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
with gr.Row():
|
| 348 |
doc_style_radio = gr.Radio(
|
| 349 |
choices=["brief", "detailed"],
|
|
@@ -419,6 +427,7 @@ with gr.Blocks(css=CUSTOM_CSS, theme=dark_theme, title="NotebookLM", js=BEFOREUN
|
|
| 419 |
no_sources_msg,
|
| 420 |
artifacts_content,
|
| 421 |
conv_summary_html,
|
|
|
|
| 422 |
doc_summary_html,
|
| 423 |
podcast_html,
|
| 424 |
quiz_html,
|
|
@@ -595,7 +604,7 @@ with gr.Blocks(css=CUSTOM_CSS, theme=dark_theme, title="NotebookLM", js=BEFOREUN
|
|
| 595 |
# ββ Artifacts: Document summary ββββββββββββββββββββββββββββββββββββββββββ
|
| 596 |
gen_doc_sum_btn.click(
|
| 597 |
fn=handle_gen_doc_summary,
|
| 598 |
-
inputs=[doc_style_radio, user_state],
|
| 599 |
outputs=[user_state, doc_summary_html],
|
| 600 |
api_name=False,
|
| 601 |
)
|
|
|
|
| 126 |
has_nb = nb is not None
|
| 127 |
has_src = has_nb and len(nb.sources) > 0
|
| 128 |
|
| 129 |
+
ready_sources = [s.filename for s in nb.sources if s.status == "ready"] if has_nb else []
|
| 130 |
+
|
| 131 |
return (
|
| 132 |
state,
|
| 133 |
# Sidebar
|
|
|
|
| 147 |
gr.update(visible=has_src), # artifacts_content visible
|
| 148 |
# Artifact sub-sections
|
| 149 |
render_conv_summary_section(state),
|
| 150 |
+
gr.update(choices=ready_sources, value=ready_sources),
|
| 151 |
render_doc_summary_section(state),
|
| 152 |
render_podcast_section(state),
|
| 153 |
render_quiz_section(state),
|
|
|
|
| 347 |
'<p style="font-size:0.82rem; color:#808098; margin:2px 0 0 0;">'
|
| 348 |
'Summarize content from your uploaded sources.</p></div></div>'
|
| 349 |
)
|
| 350 |
+
doc_source_selector = gr.CheckboxGroup(
|
| 351 |
+
choices=[],
|
| 352 |
+
label="Select sources to summarize",
|
| 353 |
+
value=[],
|
| 354 |
+
)
|
| 355 |
with gr.Row():
|
| 356 |
doc_style_radio = gr.Radio(
|
| 357 |
choices=["brief", "detailed"],
|
|
|
|
| 427 |
no_sources_msg,
|
| 428 |
artifacts_content,
|
| 429 |
conv_summary_html,
|
| 430 |
+
doc_source_selector,
|
| 431 |
doc_summary_html,
|
| 432 |
podcast_html,
|
| 433 |
quiz_html,
|
|
|
|
| 604 |
# ββ Artifacts: Document summary ββββββββββββββββββββββββββββββββββββββββββ
|
| 605 |
gen_doc_sum_btn.click(
|
| 606 |
fn=handle_gen_doc_summary,
|
| 607 |
+
inputs=[doc_style_radio, doc_source_selector, user_state],
|
| 608 |
outputs=[user_state, doc_summary_html],
|
| 609 |
api_name=False,
|
| 610 |
)
|
pages/artifacts.py
CHANGED
|
@@ -159,12 +159,13 @@ def render_doc_summary_section(state: UserData) -> str:
|
|
| 159 |
return md
|
| 160 |
|
| 161 |
|
| 162 |
-
def handle_gen_doc_summary(style: str, state: UserData) -> tuple[UserData, str]:
|
| 163 |
nb = get_active_notebook(state)
|
| 164 |
if not nb:
|
| 165 |
return state, render_doc_summary_section(state)
|
| 166 |
from services.summary_service import generate_document_summary
|
| 167 |
-
|
|
|
|
| 168 |
nb.artifacts.append(artifact)
|
| 169 |
return state, render_doc_summary_section(state)
|
| 170 |
|
|
|
|
| 159 |
return md
|
| 160 |
|
| 161 |
|
| 162 |
+
def handle_gen_doc_summary(style: str, selected_sources: list[str] | None, state: UserData) -> tuple[UserData, str]:
|
| 163 |
nb = get_active_notebook(state)
|
| 164 |
if not nb:
|
| 165 |
return state, render_doc_summary_section(state)
|
| 166 |
from services.summary_service import generate_document_summary
|
| 167 |
+
source_ids = [s.id for s in nb.sources if s.filename in (selected_sources or [])]
|
| 168 |
+
artifact = generate_document_summary(nb, style or "detailed", source_ids=source_ids or None)
|
| 169 |
nb.artifacts.append(artifact)
|
| 170 |
return state, render_doc_summary_section(state)
|
| 171 |
|
persistence/vector_store.py
CHANGED
|
@@ -75,7 +75,7 @@ class VectorStore:
|
|
| 75 |
except Exception as e:
|
| 76 |
logger.error("Failed to delete namespace from Pinecone: %s", e)
|
| 77 |
|
| 78 |
-
def query(self, query_vector: list[float], namespace: str, top_k: int = 5) -> list[dict]:
|
| 79 |
"""
|
| 80 |
Query Pinecone for the most similar chunks.
|
| 81 |
|
|
@@ -88,6 +88,7 @@ class VectorStore:
|
|
| 88 |
namespace=namespace,
|
| 89 |
top_k=top_k,
|
| 90 |
include_metadata=True,
|
|
|
|
| 91 |
)
|
| 92 |
|
| 93 |
matches = []
|
|
|
|
| 75 |
except Exception as e:
|
| 76 |
logger.error("Failed to delete namespace from Pinecone: %s", e)
|
| 77 |
|
| 78 |
+
def query(self, query_vector: list[float], namespace: str, top_k: int = 5, filter: dict | None = None) -> list[dict]:
|
| 79 |
"""
|
| 80 |
Query Pinecone for the most similar chunks.
|
| 81 |
|
|
|
|
| 88 |
namespace=namespace,
|
| 89 |
top_k=top_k,
|
| 90 |
include_metadata=True,
|
| 91 |
+
filter=filter,
|
| 92 |
)
|
| 93 |
|
| 94 |
matches = []
|
services/summary_service.py
CHANGED
|
@@ -15,17 +15,21 @@ MODEL = "claude-haiku-4-5-20251001"
|
|
| 15 |
MAX_TOKENS = 1024
|
| 16 |
|
| 17 |
|
| 18 |
-
def _get_source_text(notebook: Notebook, max_chars: int = 8000) -> str:
|
| 19 |
"""Pull chunk text from vector store for this notebook."""
|
| 20 |
try:
|
| 21 |
from persistence.vector_store import VectorStore
|
| 22 |
from ingestion_engine.embedding_generator import generate_query
|
| 23 |
|
| 24 |
query_vector = generate_query("main ideas key concepts overview summary")
|
|
|
|
|
|
|
|
|
|
| 25 |
matches = VectorStore().query(
|
| 26 |
query_vector=query_vector,
|
| 27 |
namespace=notebook.id,
|
| 28 |
top_k=20,
|
|
|
|
| 29 |
)
|
| 30 |
chunks = [m.get("text", "") for m in matches if m.get("text")]
|
| 31 |
if chunks:
|
|
@@ -106,7 +110,7 @@ def generate_conversation_summary(notebook: Notebook, style: str) -> Artifact:
|
|
| 106 |
)
|
| 107 |
|
| 108 |
|
| 109 |
-
def generate_document_summary(notebook: Notebook, style: str) -> Artifact:
|
| 110 |
"""Generate a document summary from notebook sources."""
|
| 111 |
style = style if style in ("brief", "detailed") else "detailed"
|
| 112 |
|
|
@@ -129,7 +133,7 @@ def generate_document_summary(notebook: Notebook, style: str) -> Artifact:
|
|
| 129 |
)
|
| 130 |
|
| 131 |
try:
|
| 132 |
-
source_text = _get_source_text(notebook)
|
| 133 |
prompt = (
|
| 134 |
f"Summarize this study material:\n\n"
|
| 135 |
f"SOURCE CONTENT:\n{source_text}\n\n"
|
|
|
|
| 15 |
MAX_TOKENS = 1024
|
| 16 |
|
| 17 |
|
| 18 |
+
def _get_source_text(notebook: Notebook, max_chars: int = 8000, source_ids: list[str] | None = None) -> str:
|
| 19 |
"""Pull chunk text from vector store for this notebook."""
|
| 20 |
try:
|
| 21 |
from persistence.vector_store import VectorStore
|
| 22 |
from ingestion_engine.embedding_generator import generate_query
|
| 23 |
|
| 24 |
query_vector = generate_query("main ideas key concepts overview summary")
|
| 25 |
+
filter_dict = None
|
| 26 |
+
if source_ids:
|
| 27 |
+
filter_dict = {"source_id": {"$in": source_ids}}
|
| 28 |
matches = VectorStore().query(
|
| 29 |
query_vector=query_vector,
|
| 30 |
namespace=notebook.id,
|
| 31 |
top_k=20,
|
| 32 |
+
filter=filter_dict,
|
| 33 |
)
|
| 34 |
chunks = [m.get("text", "") for m in matches if m.get("text")]
|
| 35 |
if chunks:
|
|
|
|
| 110 |
)
|
| 111 |
|
| 112 |
|
| 113 |
+
def generate_document_summary(notebook: Notebook, style: str, source_ids: list[str] | None = None) -> Artifact:
|
| 114 |
"""Generate a document summary from notebook sources."""
|
| 115 |
style = style if style in ("brief", "detailed") else "detailed"
|
| 116 |
|
|
|
|
| 133 |
)
|
| 134 |
|
| 135 |
try:
|
| 136 |
+
source_text = _get_source_text(notebook, source_ids=source_ids)
|
| 137 |
prompt = (
|
| 138 |
f"Summarize this study material:\n\n"
|
| 139 |
f"SOURCE CONTENT:\n{source_text}\n\n"
|