Spaces:

Group-1-5010
/

NotebookLM

Sleeping

App Files Files Community

internomega-terrablue commited on Mar 2

Commit

690fe5e

1 Parent(s): f2e32a6

source selection

Browse files

Files changed (4) hide show

app.py +10 -1
pages/artifacts.py +3 -2
persistence/vector_store.py +2 -1
services/summary_service.py +7 -3

app.py CHANGED Viewed

@@ -126,6 +126,8 @@ def refresh_all(state: UserData):
     has_nb = nb is not None
     has_src = has_nb and len(nb.sources) > 0
     return (
         state,
         # Sidebar
@@ -145,6 +147,7 @@ def refresh_all(state: UserData):
         gr.update(visible=has_src),   # artifacts_content visible
         # Artifact sub-sections
         render_conv_summary_section(state),
         render_doc_summary_section(state),
         render_podcast_section(state),
         render_quiz_section(state),
@@ -344,6 +347,11 @@ with gr.Blocks(css=CUSTOM_CSS, theme=dark_theme, title="NotebookLM", js=BEFOREUN
                                     '<p style="font-size:0.82rem; color:#808098; margin:2px 0 0 0;">'
                                     'Summarize content from your uploaded sources.</p></div></div>'
                                 )
                                 with gr.Row():
                                     doc_style_radio = gr.Radio(
                                         choices=["brief", "detailed"],
@@ -419,6 +427,7 @@ with gr.Blocks(css=CUSTOM_CSS, theme=dark_theme, title="NotebookLM", js=BEFOREUN
         no_sources_msg,
         artifacts_content,
         conv_summary_html,
         doc_summary_html,
         podcast_html,
         quiz_html,
@@ -595,7 +604,7 @@ with gr.Blocks(css=CUSTOM_CSS, theme=dark_theme, title="NotebookLM", js=BEFOREUN
     # ── Artifacts: Document summary ──────────────────────────────────────────
     gen_doc_sum_btn.click(
         fn=handle_gen_doc_summary,
-        inputs=[doc_style_radio, user_state],
         outputs=[user_state, doc_summary_html],
         api_name=False,
     )

     has_nb = nb is not None
     has_src = has_nb and len(nb.sources) > 0
+    ready_sources = [s.filename for s in nb.sources if s.status == "ready"] if has_nb else []
     return (
         state,
         # Sidebar
         gr.update(visible=has_src),   # artifacts_content visible
         # Artifact sub-sections
         render_conv_summary_section(state),
+        gr.update(choices=ready_sources, value=ready_sources),
         render_doc_summary_section(state),
         render_podcast_section(state),
         render_quiz_section(state),
                                     '<p style="font-size:0.82rem; color:#808098; margin:2px 0 0 0;">'
                                     'Summarize content from your uploaded sources.</p></div></div>'
                                 )
+                                doc_source_selector = gr.CheckboxGroup(
+                                    choices=[],
+                                    label="Select sources to summarize",
+                                    value=[],
+                                )
                                 with gr.Row():
                                     doc_style_radio = gr.Radio(
                                         choices=["brief", "detailed"],
         no_sources_msg,
         artifacts_content,
         conv_summary_html,
+        doc_source_selector,
         doc_summary_html,
         podcast_html,
         quiz_html,
     # ── Artifacts: Document summary ──────────────────────────────────────────
     gen_doc_sum_btn.click(
         fn=handle_gen_doc_summary,
+        inputs=[doc_style_radio, doc_source_selector, user_state],
         outputs=[user_state, doc_summary_html],
         api_name=False,
     )

pages/artifacts.py CHANGED Viewed

@@ -159,12 +159,13 @@ def render_doc_summary_section(state: UserData) -> str:
     return md
-def handle_gen_doc_summary(style: str, state: UserData) -> tuple[UserData, str]:
     nb = get_active_notebook(state)
     if not nb:
         return state, render_doc_summary_section(state)
     from services.summary_service import generate_document_summary
-    artifact = generate_document_summary(nb, style or "detailed")
     nb.artifacts.append(artifact)
     return state, render_doc_summary_section(state)

     return md
+def handle_gen_doc_summary(style: str, selected_sources: list[str] | None, state: UserData) -> tuple[UserData, str]:
     nb = get_active_notebook(state)
     if not nb:
         return state, render_doc_summary_section(state)
     from services.summary_service import generate_document_summary
+    source_ids = [s.id for s in nb.sources if s.filename in (selected_sources or [])]
+    artifact = generate_document_summary(nb, style or "detailed", source_ids=source_ids or None)
     nb.artifacts.append(artifact)
     return state, render_doc_summary_section(state)

persistence/vector_store.py CHANGED Viewed

@@ -75,7 +75,7 @@ class VectorStore:
         except Exception as e:
             logger.error("Failed to delete namespace from Pinecone: %s", e)
-    def query(self, query_vector: list[float], namespace: str, top_k: int = 5) -> list[dict]:
         """
         Query Pinecone for the most similar chunks.
@@ -88,6 +88,7 @@ class VectorStore:
                 namespace=namespace,
                 top_k=top_k,
                 include_metadata=True,
             )
             matches = []

         except Exception as e:
             logger.error("Failed to delete namespace from Pinecone: %s", e)
+    def query(self, query_vector: list[float], namespace: str, top_k: int = 5, filter: dict | None = None) -> list[dict]:
         """
         Query Pinecone for the most similar chunks.
                 namespace=namespace,
                 top_k=top_k,
                 include_metadata=True,
+                filter=filter,
             )
             matches = []

services/summary_service.py CHANGED Viewed

@@ -15,17 +15,21 @@ MODEL = "claude-haiku-4-5-20251001"
 MAX_TOKENS = 1024
-def _get_source_text(notebook: Notebook, max_chars: int = 8000) -> str:
     """Pull chunk text from vector store for this notebook."""
     try:
         from persistence.vector_store import VectorStore
         from ingestion_engine.embedding_generator import generate_query
         query_vector = generate_query("main ideas key concepts overview summary")
         matches = VectorStore().query(
             query_vector=query_vector,
             namespace=notebook.id,
             top_k=20,
         )
         chunks = [m.get("text", "") for m in matches if m.get("text")]
         if chunks:
@@ -106,7 +110,7 @@ def generate_conversation_summary(notebook: Notebook, style: str) -> Artifact:
     )
-def generate_document_summary(notebook: Notebook, style: str) -> Artifact:
     """Generate a document summary from notebook sources."""
     style = style if style in ("brief", "detailed") else "detailed"
@@ -129,7 +133,7 @@ def generate_document_summary(notebook: Notebook, style: str) -> Artifact:
         )
     try:
-        source_text = _get_source_text(notebook)
         prompt = (
             f"Summarize this study material:\n\n"
             f"SOURCE CONTENT:\n{source_text}\n\n"

 MAX_TOKENS = 1024
+def _get_source_text(notebook: Notebook, max_chars: int = 8000, source_ids: list[str] | None = None) -> str:
     """Pull chunk text from vector store for this notebook."""
     try:
         from persistence.vector_store import VectorStore
         from ingestion_engine.embedding_generator import generate_query
         query_vector = generate_query("main ideas key concepts overview summary")
+        filter_dict = None
+        if source_ids:
+            filter_dict = {"source_id": {"$in": source_ids}}
         matches = VectorStore().query(
             query_vector=query_vector,
             namespace=notebook.id,
             top_k=20,
+            filter=filter_dict,
         )
         chunks = [m.get("text", "") for m in matches if m.get("text")]
         if chunks:
     )
+def generate_document_summary(notebook: Notebook, style: str, source_ids: list[str] | None = None) -> Artifact:
     """Generate a document summary from notebook sources."""
     style = style if style in ("brief", "detailed") else "detailed"
         )
     try:
+        source_text = _get_source_text(notebook, source_ids=source_ids)
         prompt = (
             f"Summarize this study material:\n\n"
             f"SOURCE CONTENT:\n{source_text}\n\n"