internomega-terrablue commited on
Commit
690fe5e
Β·
1 Parent(s): f2e32a6

source selection

Browse files
app.py CHANGED
@@ -126,6 +126,8 @@ def refresh_all(state: UserData):
126
  has_nb = nb is not None
127
  has_src = has_nb and len(nb.sources) > 0
128
 
 
 
129
  return (
130
  state,
131
  # Sidebar
@@ -145,6 +147,7 @@ def refresh_all(state: UserData):
145
  gr.update(visible=has_src), # artifacts_content visible
146
  # Artifact sub-sections
147
  render_conv_summary_section(state),
 
148
  render_doc_summary_section(state),
149
  render_podcast_section(state),
150
  render_quiz_section(state),
@@ -344,6 +347,11 @@ with gr.Blocks(css=CUSTOM_CSS, theme=dark_theme, title="NotebookLM", js=BEFOREUN
344
  '<p style="font-size:0.82rem; color:#808098; margin:2px 0 0 0;">'
345
  'Summarize content from your uploaded sources.</p></div></div>'
346
  )
 
 
 
 
 
347
  with gr.Row():
348
  doc_style_radio = gr.Radio(
349
  choices=["brief", "detailed"],
@@ -419,6 +427,7 @@ with gr.Blocks(css=CUSTOM_CSS, theme=dark_theme, title="NotebookLM", js=BEFOREUN
419
  no_sources_msg,
420
  artifacts_content,
421
  conv_summary_html,
 
422
  doc_summary_html,
423
  podcast_html,
424
  quiz_html,
@@ -595,7 +604,7 @@ with gr.Blocks(css=CUSTOM_CSS, theme=dark_theme, title="NotebookLM", js=BEFOREUN
595
  # ── Artifacts: Document summary ──────────────────────────────────────────
596
  gen_doc_sum_btn.click(
597
  fn=handle_gen_doc_summary,
598
- inputs=[doc_style_radio, user_state],
599
  outputs=[user_state, doc_summary_html],
600
  api_name=False,
601
  )
 
126
  has_nb = nb is not None
127
  has_src = has_nb and len(nb.sources) > 0
128
 
129
+ ready_sources = [s.filename for s in nb.sources if s.status == "ready"] if has_nb else []
130
+
131
  return (
132
  state,
133
  # Sidebar
 
147
  gr.update(visible=has_src), # artifacts_content visible
148
  # Artifact sub-sections
149
  render_conv_summary_section(state),
150
+ gr.update(choices=ready_sources, value=ready_sources),
151
  render_doc_summary_section(state),
152
  render_podcast_section(state),
153
  render_quiz_section(state),
 
347
  '<p style="font-size:0.82rem; color:#808098; margin:2px 0 0 0;">'
348
  'Summarize content from your uploaded sources.</p></div></div>'
349
  )
350
+ doc_source_selector = gr.CheckboxGroup(
351
+ choices=[],
352
+ label="Select sources to summarize",
353
+ value=[],
354
+ )
355
  with gr.Row():
356
  doc_style_radio = gr.Radio(
357
  choices=["brief", "detailed"],
 
427
  no_sources_msg,
428
  artifacts_content,
429
  conv_summary_html,
430
+ doc_source_selector,
431
  doc_summary_html,
432
  podcast_html,
433
  quiz_html,
 
604
  # ── Artifacts: Document summary ──────────────────────────────────────────
605
  gen_doc_sum_btn.click(
606
  fn=handle_gen_doc_summary,
607
+ inputs=[doc_style_radio, doc_source_selector, user_state],
608
  outputs=[user_state, doc_summary_html],
609
  api_name=False,
610
  )
pages/artifacts.py CHANGED
@@ -159,12 +159,13 @@ def render_doc_summary_section(state: UserData) -> str:
159
  return md
160
 
161
 
162
- def handle_gen_doc_summary(style: str, state: UserData) -> tuple[UserData, str]:
163
  nb = get_active_notebook(state)
164
  if not nb:
165
  return state, render_doc_summary_section(state)
166
  from services.summary_service import generate_document_summary
167
- artifact = generate_document_summary(nb, style or "detailed")
 
168
  nb.artifacts.append(artifact)
169
  return state, render_doc_summary_section(state)
170
 
 
159
  return md
160
 
161
 
162
+ def handle_gen_doc_summary(style: str, selected_sources: list[str] | None, state: UserData) -> tuple[UserData, str]:
163
  nb = get_active_notebook(state)
164
  if not nb:
165
  return state, render_doc_summary_section(state)
166
  from services.summary_service import generate_document_summary
167
+ source_ids = [s.id for s in nb.sources if s.filename in (selected_sources or [])]
168
+ artifact = generate_document_summary(nb, style or "detailed", source_ids=source_ids or None)
169
  nb.artifacts.append(artifact)
170
  return state, render_doc_summary_section(state)
171
 
persistence/vector_store.py CHANGED
@@ -75,7 +75,7 @@ class VectorStore:
75
  except Exception as e:
76
  logger.error("Failed to delete namespace from Pinecone: %s", e)
77
 
78
- def query(self, query_vector: list[float], namespace: str, top_k: int = 5) -> list[dict]:
79
  """
80
  Query Pinecone for the most similar chunks.
81
 
@@ -88,6 +88,7 @@ class VectorStore:
88
  namespace=namespace,
89
  top_k=top_k,
90
  include_metadata=True,
 
91
  )
92
 
93
  matches = []
 
75
  except Exception as e:
76
  logger.error("Failed to delete namespace from Pinecone: %s", e)
77
 
78
+ def query(self, query_vector: list[float], namespace: str, top_k: int = 5, filter: dict | None = None) -> list[dict]:
79
  """
80
  Query Pinecone for the most similar chunks.
81
 
 
88
  namespace=namespace,
89
  top_k=top_k,
90
  include_metadata=True,
91
+ filter=filter,
92
  )
93
 
94
  matches = []
services/summary_service.py CHANGED
@@ -15,17 +15,21 @@ MODEL = "claude-haiku-4-5-20251001"
15
  MAX_TOKENS = 1024
16
 
17
 
18
- def _get_source_text(notebook: Notebook, max_chars: int = 8000) -> str:
19
  """Pull chunk text from vector store for this notebook."""
20
  try:
21
  from persistence.vector_store import VectorStore
22
  from ingestion_engine.embedding_generator import generate_query
23
 
24
  query_vector = generate_query("main ideas key concepts overview summary")
 
 
 
25
  matches = VectorStore().query(
26
  query_vector=query_vector,
27
  namespace=notebook.id,
28
  top_k=20,
 
29
  )
30
  chunks = [m.get("text", "") for m in matches if m.get("text")]
31
  if chunks:
@@ -106,7 +110,7 @@ def generate_conversation_summary(notebook: Notebook, style: str) -> Artifact:
106
  )
107
 
108
 
109
- def generate_document_summary(notebook: Notebook, style: str) -> Artifact:
110
  """Generate a document summary from notebook sources."""
111
  style = style if style in ("brief", "detailed") else "detailed"
112
 
@@ -129,7 +133,7 @@ def generate_document_summary(notebook: Notebook, style: str) -> Artifact:
129
  )
130
 
131
  try:
132
- source_text = _get_source_text(notebook)
133
  prompt = (
134
  f"Summarize this study material:\n\n"
135
  f"SOURCE CONTENT:\n{source_text}\n\n"
 
15
  MAX_TOKENS = 1024
16
 
17
 
18
+ def _get_source_text(notebook: Notebook, max_chars: int = 8000, source_ids: list[str] | None = None) -> str:
19
  """Pull chunk text from vector store for this notebook."""
20
  try:
21
  from persistence.vector_store import VectorStore
22
  from ingestion_engine.embedding_generator import generate_query
23
 
24
  query_vector = generate_query("main ideas key concepts overview summary")
25
+ filter_dict = None
26
+ if source_ids:
27
+ filter_dict = {"source_id": {"$in": source_ids}}
28
  matches = VectorStore().query(
29
  query_vector=query_vector,
30
  namespace=notebook.id,
31
  top_k=20,
32
+ filter=filter_dict,
33
  )
34
  chunks = [m.get("text", "") for m in matches if m.get("text")]
35
  if chunks:
 
110
  )
111
 
112
 
113
+ def generate_document_summary(notebook: Notebook, style: str, source_ids: list[str] | None = None) -> Artifact:
114
  """Generate a document summary from notebook sources."""
115
  style = style if style in ("brief", "detailed") else "detailed"
116
 
 
133
  )
134
 
135
  try:
136
+ source_text = _get_source_text(notebook, source_ids=source_ids)
137
  prompt = (
138
  f"Summarize this study material:\n\n"
139
  f"SOURCE CONTENT:\n{source_text}\n\n"