NotebookLMClone

Runtime error

App Files Files Community

Hitakshi26 commited on Mar 3

Commit

8593064

1 Parent(s): ca39256

Testing Flow

Browse files

Files changed (20) hide show

README.md +32 -5
app.py +8 -3
src/backend/__pycache__/artifacts.cpython-310.pyc +0 -0
src/backend/__pycache__/auth.cpython-310.pyc +0 -0
src/backend/__pycache__/ingest.cpython-310.pyc +0 -0
src/backend/__pycache__/llm.cpython-310.pyc +0 -0
src/backend/__pycache__/notebooks.cpython-310.pyc +0 -0
src/backend/__pycache__/rag.cpython-310.pyc +0 -0
src/backend/auth.py +32 -13
src/backend/ingest.py +113 -54
src/frontend/__pycache__/callbacks.cpython-310.pyc +0 -0
src/frontend/__pycache__/ui.cpython-310.pyc +0 -0
src/frontend/ui.py +55 -197
src/storage/__pycache__/artifact_store.cpython-310.pyc +0 -0
src/storage/__pycache__/chat_store.cpython-310.pyc +0 -0
src/storage/__pycache__/chroma_store.cpython-310.pyc +0 -0
src/storage/__pycache__/index_store.cpython-310.pyc +0 -0
src/storage/__pycache__/paths.cpython-310.pyc +0 -0
src/storage/chroma_store.py +9 -1
src/utils/__pycache__/text.cpython-310.pyc +0 -0

README.md CHANGED Viewed

@@ -1,12 +1,39 @@
 ---
-title: GPP1 - NotebookLM Clone
 emoji: 📓
-colorFrom: indigo
-colorTo: purple
 sdk: gradio
 sdk_version: "4.44.1"
-python_version: "3.10"
 app_file: app.py
 pinned: false
 hf_oauth: true
----

 ---
+title: NotebookLM Clone (GPP1)
 emoji: 📓
+colorFrom: blue
+colorTo: pink
 sdk: gradio
 sdk_version: "4.44.1"
 app_file: app.py
 pinned: false
 hf_oauth: true
+---
+# NotebookLM Clone (HF OAuth + Chroma + RAG)
+## Overview
+This project is a simplified clone of Google NotebookLM. Users can create multiple notebooks, upload sources (PDF/PPTX/TXT/URL), chat with their sources using Retrieval-Augmented Generation (RAG) with citations, and generate study artifacts (report, quiz, podcast).
+## Features
+- HF OAuth login (per-user isolation)
+- Multi-notebook support: create/rename/delete
+- Ingestion: PDF / PPTX / TXT / URL
+- Chunking + Embedding (Sentence-Transformers all-MiniLM-L6-v2)
+- Vector search using ChromaDB (persistent per notebook)
+- Chat with citations
+- Artifact generation:
+  - report (.md)
+  - quiz with answer key (.md)
+  - podcast transcript (.md) + audio (.mp3)
+## Environment Variables
+### Hugging Face Space
+- DATA_ROOT=/data
+## Local Dev
+1. Create venv + install dependencies:
+   - pip install -r requirements.txt
+2. Run:
+   - python app.py
+Note: HF OAuth is best tested in a Space.

app.py CHANGED Viewed

@@ -1,7 +1,12 @@
 import os
-# Disable Chroma telemetry noise
-os.environ["ANONYMIZED_TELEMETRY"] = "FALSE"
 from src.frontend.ui import build_app
@@ -12,4 +17,4 @@ if __name__ == "__main__":
         server_name="0.0.0.0",
         server_port=int(os.getenv("PORT", "7860")),
         show_api=False,
-    )

 import os
+# ----- Disable telemetry / analytics noise -----
+# Gradio analytics (UI usage pings)
+os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
+# HF hub telemetry (optional)
+os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
+# Chroma telemetry (we also disable via Settings in chroma_store.py)
+os.environ["ANONYMIZED_TELEMETRY"] = "False"
 from src.frontend.ui import build_app
         server_name="0.0.0.0",
         server_port=int(os.getenv("PORT", "7860")),
         show_api=False,
+    )

src/backend/__pycache__/artifacts.cpython-310.pyc CHANGED Viewed

Binary files a/src/backend/__pycache__/artifacts.cpython-310.pyc and b/src/backend/__pycache__/artifacts.cpython-310.pyc differ

src/backend/__pycache__/auth.cpython-310.pyc CHANGED Viewed

Binary files a/src/backend/__pycache__/auth.cpython-310.pyc and b/src/backend/__pycache__/auth.cpython-310.pyc differ

src/backend/__pycache__/ingest.cpython-310.pyc CHANGED Viewed

Binary files a/src/backend/__pycache__/ingest.cpython-310.pyc and b/src/backend/__pycache__/ingest.cpython-310.pyc differ

src/backend/__pycache__/llm.cpython-310.pyc CHANGED Viewed

Binary files a/src/backend/__pycache__/llm.cpython-310.pyc and b/src/backend/__pycache__/llm.cpython-310.pyc differ

src/backend/__pycache__/notebooks.cpython-310.pyc CHANGED Viewed

Binary files a/src/backend/__pycache__/notebooks.cpython-310.pyc and b/src/backend/__pycache__/notebooks.cpython-310.pyc differ

src/backend/__pycache__/rag.cpython-310.pyc CHANGED Viewed

Binary files a/src/backend/__pycache__/rag.cpython-310.pyc and b/src/backend/__pycache__/rag.cpython-310.pyc differ

src/backend/auth.py CHANGED Viewed

@@ -2,32 +2,51 @@ import os
 import gradio as gr
 def require_login(request: gr.Request) -> str:
     """
-    Hugging Face Spaces OAuth provides user info via request in some Gradio versions,
-    but not always. We use multiple fallbacks:
-    1) request.username (best case)
-    2) HF-proxy headers (x-forwarded-*)
-    3) local/dev fallback
     """
-    # 1) Best-case Gradio field
     username = getattr(request, "username", None)
     if username:
         return str(username)
-    # 2) Fallback: HF spaces headers (varies by proxy/version)
-    headers = getattr(request, "headers", {}) or {}
     for key in [
         "x-forwarded-user",
         "x-hf-user",
         "x-forwarded-preferred-username",
         "x-auth-request-preferred-username",
     ]:
-        if key in headers and headers[key]:
-            return str(headers[key])
-    # 3) Optional local fallback (so app doesn't hard-crash during dev)
-    if os.getenv("HF_SPACE_ID") is None:
         return "localuser"
-    raise gr.Error("Please log in using 'Sign in with Hugging Face' to use this app.")

 import gradio as gr
+def _get_headers_dict(request: gr.Request) -> dict:
+    h = getattr(request, "headers", None) or {}
+    # Normalize keys to lowercase
+    out = {}
+    try:
+        for k, v in dict(h).items():
+            out[str(k).lower()] = v
+    except Exception:
+        return {}
+    return out
+def _first_value(v):
+    # Some frameworks store header values as lists
+    if isinstance(v, (list, tuple)) and v:
+        return v[0]
+    return v
 def require_login(request: gr.Request) -> str:
     """
+    Returns a stable username for storage paths.
+    Works in HF Spaces and local dev.
     """
+    # 1) Best-case (some Gradio versions populate this)
     username = getattr(request, "username", None)
     if username:
         return str(username)
+    # 2) HF proxy headers (varies by setup)
+    headers = _get_headers_dict(request)
     for key in [
         "x-forwarded-user",
         "x-hf-user",
         "x-forwarded-preferred-username",
         "x-auth-request-preferred-username",
     ]:
+        val = _first_value(headers.get(key))
+        if val:
+            return str(val)
+    # 3) Local/dev fallback (so app doesn't hard-crash during dev)
+    # HF Spaces usually sets SPACE_ID; locally it won't exist.
+    if os.getenv("SPACE_ID") is None and os.getenv("HF_SPACE_ID") is None:
         return "localuser"
+    raise gr.Error("Please log in using 'Sign in with Hugging Face' to use this app.")

src/backend/ingest.py CHANGED Viewed

@@ -1,4 +1,6 @@
-import os, pathlib
 import requests
 from bs4 import BeautifulSoup
 from pypdf import PdfReader
@@ -11,6 +13,17 @@ from src.utils.text import safe_name
 EMBED_MODEL = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 def simple_chunk(text: str, max_chars=2200, overlap=250):
     text = "\n".join([ln.strip() for ln in (text or "").splitlines() if ln.strip()]).strip()
     if not text:
@@ -21,19 +34,25 @@ def simple_chunk(text: str, max_chars=2200, overlap=250):
     while start < len(text):
         end = min(len(text), start + max_chars)
         out.append(text[start:end])
-        if end == len(text): break
         start = max(0, end - overlap)
     return out
 def extract_pdf(path: str):
     reader = PdfReader(path)
     items = []
     for i, page in enumerate(reader.pages):
         txt = (page.extract_text() or "").strip()
         if txt:
-            items.append({"text": txt, "page": i+1})
     return items
 def extract_pptx(path: str):
     prs = Presentation(path)
     items = []
@@ -44,41 +63,67 @@ def extract_pptx(path: str):
                 texts.append(shape.text)
         txt = "\n".join(t.strip() for t in texts if t.strip()).strip()
         if txt:
-            items.append({"text": txt, "slide": i+1})
     return items
 def extract_txt(path: str):
     with open(path, "r", encoding="utf-8", errors="ignore") as f:
         txt = f.read().strip()
-    return [{"text": txt, "page": None}] if txt else []
 def extract_url(url: str):
     r = requests.get(url, timeout=15, headers={"User-Agent": "Mozilla/5.0"})
     r.raise_for_status()
     soup = BeautifulSoup(r.text, "html.parser")
-    for tag in soup(["script","style","noscript"]):
         tag.decompose()
     text = soup.get_text("\n")
     text = "\n".join([ln.strip() for ln in text.splitlines() if ln.strip()])
-    return [{"text": text[:200000], "page": None}]
-def upsert_extracted(username: str, notebook_id: str, source_title: str, source_id: str, extracted_items: list[dict]) -> int:
     col = get_collection(username, notebook_id)
     ids, docs, metas = [], [], []
-    for item in extracted_items:
-        page = item.get("page", None)
-        slide = item.get("slide", None)
-        for j, ch in enumerate(simple_chunk(item["text"])):
-            ids.append(f"{source_id}::chunk{j}")
             docs.append(ch)
             meta = {
                 "source_title": str(source_title),
                 "source_id": str(source_id),
             }
-            # IMPORTANT: Chroma metadata cannot include None
             if page is not None:
                 meta["page"] = int(page)
             if slide is not None:
@@ -94,36 +139,32 @@ def upsert_extracted(username: str, notebook_id: str, source_title: str, source_
     return len(docs)
-def ingest_files(username: str, notebook_id: str, filepaths) -> int:
-    """
-    filepaths may be:
-    - list[str]
-    - list[Gradio file objects]
-    """
     ensure_tree(username, notebook_id)
     raw_dir = os.path.join(nb_root(username, notebook_id), "files_raw")
     ex_dir = os.path.join(nb_root(username, notebook_id), "files_extracted")
     added = 0
-    # Normalize gradio file objects -> local paths
-    normalized_paths = []
-    if isinstance(filepaths, (list, tuple)):
-        for f in filepaths:
-            if f is None:
-                continue
-            # Gradio may pass objects with .name
-            if hasattr(f, "name") and isinstance(f.name, str):
-                normalized_paths.append(f.name)
-            elif isinstance(f, str):
-                normalized_paths.append(f)
-            elif isinstance(f, dict) and "name" in f:
-                normalized_paths.append(f["name"])
-    elif isinstance(filepaths, str):
-        normalized_paths = [filepaths]
-    for fp in normalized_paths:
-        dest = os.path.join(raw_dir, os.path.basename(fp))
-        pathlib.Path(dest).write_bytes(pathlib.Path(fp).read_bytes())
         ext = os.path.splitext(dest)[1].lower()
         if ext == ".pdf":
@@ -135,24 +176,27 @@ def ingest_files(username: str, notebook_id: str, filepaths) -> int:
         else:
             continue
-        # save extracted
-        ex_path = os.path.join(ex_dir, os.path.basename(dest) + ".txt")
-        with open(ex_path, "w", encoding="utf-8") as f:
             for item in extracted:
                 if item.get("page") is not None:
                     loc = f"page={item.get('page')}"
                 elif item.get("slide") is not None:
                     loc = f"slide={item.get('slide')}"
-                else:
-                    loc = ""
-                f.write(f"\n--- {loc} ---\n{item['text']}\n")
         added += upsert_extracted(
-            username,
-            notebook_id,
-            os.path.basename(dest),
-            f"file:{os.path.basename(dest)}",
-            extracted,
         )
     return added
@@ -160,9 +204,24 @@ def ingest_files(username: str, notebook_id: str, filepaths) -> int:
 def ingest_url(username: str, notebook_id: str, url: str) -> int:
     ensure_tree(username, notebook_id)
     extracted = extract_url(url)
     ex_dir = os.path.join(nb_root(username, notebook_id), "files_extracted")
-    fname = safe_name(url.replace("https://","").replace("http://","").replace("/","_")) + ".txt"
     with open(os.path.join(ex_dir, fname), "w", encoding="utf-8") as f:
-        f.write(extracted[0]["text"])
-    return upsert_extracted(username, notebook_id, url, f"url:{url}", extracted)

+import os
+import pathlib
+import hashlib
 import requests
 from bs4 import BeautifulSoup
 from pypdf import PdfReader
 EMBED_MODEL = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+# -------------------------
+# Helpers
+# -------------------------
+def _sha10_bytes(b: bytes) -> str:
+    return hashlib.sha256(b).hexdigest()[:10]
+def _sha10_text(s: str) -> str:
+    return hashlib.sha256((s or "").encode("utf-8", errors="ignore")).hexdigest()[:10]
 def simple_chunk(text: str, max_chars=2200, overlap=250):
     text = "\n".join([ln.strip() for ln in (text or "").splitlines() if ln.strip()]).strip()
     if not text:
     while start < len(text):
         end = min(len(text), start + max_chars)
         out.append(text[start:end])
+        if end == len(text):
+            break
         start = max(0, end - overlap)
     return out
+# -------------------------
+# Extractors
+# -------------------------
 def extract_pdf(path: str):
     reader = PdfReader(path)
     items = []
     for i, page in enumerate(reader.pages):
         txt = (page.extract_text() or "").strip()
         if txt:
+            items.append({"text": txt, "page": i + 1})
     return items
 def extract_pptx(path: str):
     prs = Presentation(path)
     items = []
                 texts.append(shape.text)
         txt = "\n".join(t.strip() for t in texts if t.strip()).strip()
         if txt:
+            items.append({"text": txt, "slide": i + 1})
     return items
 def extract_txt(path: str):
     with open(path, "r", encoding="utf-8", errors="ignore") as f:
         txt = f.read().strip()
+    return [{"text": txt}] if txt else []
 def extract_url(url: str):
     r = requests.get(url, timeout=15, headers={"User-Agent": "Mozilla/5.0"})
     r.raise_for_status()
     soup = BeautifulSoup(r.text, "html.parser")
+    for tag in soup(["script", "style", "noscript"]):
         tag.decompose()
     text = soup.get_text("\n")
     text = "\n".join([ln.strip() for ln in text.splitlines() if ln.strip()])
+    # hard cap so we don’t embed infinite pages
+    return [{"text": text[:200000]}]
+# -------------------------
+# Chroma upsert
+# -------------------------
+def upsert_extracted(
+    username: str,
+    notebook_id: str,
+    source_title: str,
+    source_id: str,
+    extracted_items: list[dict],
+) -> int:
     col = get_collection(username, notebook_id)
     ids, docs, metas = [], [], []
+    for item_idx, item in enumerate(extracted_items):
+        page = item.get("page")
+        slide = item.get("slide")
+        # stable location string (never None)
+        if page is not None:
+            loc = f"p{int(page)}"
+        elif slide is not None:
+            loc = f"s{int(slide)}"
+        else:
+            loc = f"item{item_idx}"
+        chunks = simple_chunk(item.get("text", ""))
+        for chunk_idx, ch in enumerate(chunks):
+            # ✅ unique per (source + loc + chunk)
+            cid = f"{source_id}::{loc}::chunk{chunk_idx}"
+            ids.append(cid)
             docs.append(ch)
             meta = {
                 "source_title": str(source_title),
                 "source_id": str(source_id),
             }
+            # ✅ Chroma metadata cannot contain None → only set if present
             if page is not None:
                 meta["page"] = int(page)
             if slide is not None:
     return len(docs)
+# -------------------------
+# Public API used by callbacks.py
+# -------------------------
+def ingest_files(username: str, notebook_id: str, files) -> int:
     ensure_tree(username, notebook_id)
     raw_dir = os.path.join(nb_root(username, notebook_id), "files_raw")
     ex_dir = os.path.join(nb_root(username, notebook_id), "files_extracted")
+    os.makedirs(raw_dir, exist_ok=True)
+    os.makedirs(ex_dir, exist_ok=True)
     added = 0
+    for f in (files or []):
+        fp = getattr(f, "name", None)
+        if not fp:
+            continue
+        # copy uploaded file to raw_dir
+        src_path = pathlib.Path(fp)
+        file_bytes = src_path.read_bytes()
+        base = os.path.basename(fp)
+        dest = os.path.join(raw_dir, base)
+        pathlib.Path(dest).write_bytes(file_bytes)
         ext = os.path.splitext(dest)[1].lower()
         if ext == ".pdf":
         else:
             continue
+        # save extracted text
+        ex_path = os.path.join(ex_dir, base + ".txt")
+        with open(ex_path, "w", encoding="utf-8") as ftxt:
             for item in extracted:
+                loc = ""
                 if item.get("page") is not None:
                     loc = f"page={item.get('page')}"
                 elif item.get("slide") is not None:
                     loc = f"slide={item.get('slide')}"
+                ftxt.write(f"\n--- {loc} ---\n{item.get('text','')}\n")
+        # ✅ Add a hash so repeated ingest of same filename won't collide
+        file_hash = _sha10_bytes(file_bytes)
+        source_id = f"file:{base}:{file_hash}"
         added += upsert_extracted(
+            username=username,
+            notebook_id=notebook_id,
+            source_title=base,
+            source_id=source_id,
+            extracted_items=extracted,
         )
     return added
 def ingest_url(username: str, notebook_id: str, url: str) -> int:
     ensure_tree(username, notebook_id)
     extracted = extract_url(url)
     ex_dir = os.path.join(nb_root(username, notebook_id), "files_extracted")
+    os.makedirs(ex_dir, exist_ok=True)
+    # save extracted page text
+    fname = safe_name(url.replace("https://", "").replace("http://", "").replace("/", "_")) + ".txt"
     with open(os.path.join(ex_dir, fname), "w", encoding="utf-8") as f:
+        f.write(extracted[0].get("text", ""))
+    # ✅ Hash text so re-ingest doesn’t collide
+    text_hash = _sha10_text(extracted[0].get("text", ""))
+    source_id = f"url:{url}:{text_hash}"
+    return upsert_extracted(
+        username=username,
+        notebook_id=notebook_id,
+        source_title=url,
+        source_id=source_id,
+        extracted_items=extracted,
+    )

src/frontend/__pycache__/callbacks.cpython-310.pyc CHANGED Viewed

Binary files a/src/frontend/__pycache__/callbacks.cpython-310.pyc and b/src/frontend/__pycache__/callbacks.cpython-310.pyc differ

src/frontend/__pycache__/ui.cpython-310.pyc CHANGED Viewed

Binary files a/src/frontend/__pycache__/ui.cpython-310.pyc and b/src/frontend/__pycache__/ui.cpython-310.pyc differ

src/frontend/ui.py CHANGED Viewed

@@ -11,15 +11,13 @@ from src.frontend.callbacks import (
     on_report,
     on_quiz,
     on_podcast,
-    on_download
 )
 from src.backend.auth import require_login
 def build_app():
     with gr.Blocks(title="NotebookLM Clone") as demo:
         gr.Markdown("# 📓 NotebookLM Clone (HF Auth + Chroma + RAG)")
         login = gr.LoginButton()
@@ -27,296 +25,156 @@ def build_app():
         username_state = gr.State("")
-        # ---------- LOAD ----------
-        def on_load(request: gr.Request):
-            username = require_login(request)
-            dd, chat, arts = ui_bootstrap(username)
-            return username, dd, chat, arts
         with gr.Row():
-            # ---------- LEFT PANEL ----------
             with gr.Column(scale=1):
-                user_box = gr.Textbox(
-                    label="User",
-                    interactive=False
-                )
-                notebook_dd = gr.Dropdown(
-                    label="Notebooks",
-                    choices=[],
-                    interactive=True
-                )
-                nb_new = gr.Textbox(
-                    label="Create notebook",
-                    placeholder="Name"
-                )
                 btn_create = gr.Button("Create")
-                nb_rename = gr.Textbox(
-                    label="Rename notebook",
-                    placeholder="New name"
-                )
                 btn_rename = gr.Button("Rename")
-                btn_delete = gr.Button(
-                    "Delete current",
-                    variant="stop"
-                )
-                # ---------- INGEST ----------
                 gr.Markdown("## Ingest")
-                file_up = gr.File(
-                    label="Upload PDF/PPTX/TXT",
-                    file_count="multiple"
-                )
                 btn_ingest_files = gr.Button("Ingest Files")
-                ingest_status = gr.Textbox(
-                    label="Status",
-                    interactive=False
-                )
-                url_in = gr.Textbox(
-                    label="URL",
-                    placeholder="https://..."
-                )
                 btn_ingest_url = gr.Button("Ingest URL")
-                url_status = gr.Textbox(
-                    label="Status",
-                    interactive=False
-                )
-                # ---------- ARTIFACTS ----------
                 gr.Markdown("## Artifacts")
-                topic = gr.Textbox(
-                    label="Topic / prompt"
-                )
-                extra = gr.Textbox(
-                    label="Extra prompt (optional)"
-                )
                 btn_report = gr.Button("Generate Report")
                 btn_quiz = gr.Button("Generate Quiz")
                 btn_podcast = gr.Button("Generate Podcast")
-                artifact_status = gr.Textbox(
-                    label="Artifact status",
-                    interactive=False
-                )
-                artifacts_list = gr.Dropdown(
-                    label="Artifacts",
-                    choices=[],
-                    interactive=True
-                )
                 download_btn = gr.Button("Download selected")
-                download_file = gr.File(
-                    label="Download",
-                    interactive=False
-                )
-                podcast_audio = gr.Audio(
-                    label="Podcast Audio",
-                    interactive=False
-                )
-            # ---------- RIGHT PANEL ----------
             with gr.Column(scale=2):
-                chatbot = gr.Chatbot(
-                    height=520,
-                    label="Chat (RAG + citations)"
-                )
                 msg = gr.Textbox(label="Message")
                 send = gr.Button("Send")
-        # ---------- EVENTS (API DISABLED FIX) ----------
         demo.load(
             on_load,
             inputs=None,
             outputs=[
                 username_state,
                 notebook_dd,
                 chatbot,
-                artifacts_list
             ],
             queue=False,
-            api_name=False
-        )
-        username_state.change(
-            lambda u: u,
-            inputs=username_state,
-            outputs=user_box,
-            queue=False,
-            api_name=False
         )
         notebook_dd.change(
             on_switch_notebook,
             inputs=[username_state, notebook_dd],
             outputs=[chatbot, artifacts_list],
             queue=False,
-            api_name=False
         )
         btn_create.click(
             on_create_notebook,
             inputs=[username_state, nb_new],
-            outputs=[
-                notebook_dd,
-                chatbot,
-                artifacts_list
-            ],
             queue=False,
-            api_name=False
         )
         btn_rename.click(
             on_rename_notebook,
-            inputs=[
-                username_state,
-                notebook_dd,
-                nb_rename
-            ],
             outputs=[notebook_dd],
             queue=False,
-            api_name=False
         )
         btn_delete.click(
             on_delete_notebook,
-            inputs=[
-                username_state,
-                notebook_dd
-            ],
-            outputs=[
-                notebook_dd,
-                chatbot,
-                artifacts_list
-            ],
             queue=False,
-            api_name=False
         )
         btn_ingest_files.click(
             on_ingest_files,
-            inputs=[
-                username_state,
-                notebook_dd,
-                file_up
-            ],
             outputs=[ingest_status],
             queue=True,
-            api_name=False
         )
         btn_ingest_url.click(
             on_ingest_url,
-            inputs=[
-                username_state,
-                notebook_dd,
-                url_in
-            ],
             outputs=[url_status],
             queue=True,
-            api_name=False
         )
         send.click(
             on_chat,
-            inputs=[
-                username_state,
-                notebook_dd,
-                chatbot,
-                msg
-            ],
-            outputs=[
-                chatbot,
-                msg
-            ],
             queue=True,
-            api_name=False
         )
         btn_report.click(
             on_report,
-            inputs=[
-                username_state,
-                notebook_dd,
-                topic,
-                extra
-            ],
-            outputs=[
-                artifact_status,
-                artifacts_list,
-                download_file
-            ],
             queue=True,
-            api_name=False
         )
         btn_quiz.click(
             on_quiz,
-            inputs=[
-                username_state,
-                notebook_dd,
-                topic,
-                extra
-            ],
-            outputs=[
-                artifact_status,
-                artifacts_list,
-                download_file
-            ],
             queue=True,
-            api_name=False
         )
         btn_podcast.click(
             on_podcast,
-            inputs=[
-                username_state,
-                notebook_dd,
-                topic,
-                extra
-            ],
-            outputs=[
-                artifact_status,
-                artifacts_list,
-                download_file,
-                podcast_audio
-            ],
             queue=True,
-            api_name=False
         )
         download_btn.click(
             on_download,
-            inputs=[
-                username_state,
-                notebook_dd,
-                artifacts_list
-            ],
             outputs=[download_file],
             queue=False,
-            api_name=False
         )
-    return demo

     on_report,
     on_quiz,
     on_podcast,
+    on_download,
 )
 from src.backend.auth import require_login
 def build_app():
     with gr.Blocks(title="NotebookLM Clone") as demo:
         gr.Markdown("# 📓 NotebookLM Clone (HF Auth + Chroma + RAG)")
         login = gr.LoginButton()
         username_state = gr.State("")
+        # ---------- UI ----------
         with gr.Row():
             with gr.Column(scale=1):
+                user_box = gr.Textbox(label="User", interactive=False)
+                notebook_dd = gr.Dropdown(label="Notebooks", choices=[], interactive=True)
+                nb_new = gr.Textbox(label="Create notebook", placeholder="Name")
                 btn_create = gr.Button("Create")
+                nb_rename = gr.Textbox(label="Rename notebook", placeholder="New name")
                 btn_rename = gr.Button("Rename")
+                btn_delete = gr.Button("Delete current", variant="stop")
                 gr.Markdown("## Ingest")
+                file_up = gr.File(label="Upload PDF/PPTX/TXT", file_count="multiple")
                 btn_ingest_files = gr.Button("Ingest Files")
+                ingest_status = gr.Textbox(label="Status", interactive=False)
+                url_in = gr.Textbox(label="URL", placeholder="https://...")
                 btn_ingest_url = gr.Button("Ingest URL")
+                url_status = gr.Textbox(label="Status", interactive=False)
                 gr.Markdown("## Artifacts")
+                topic = gr.Textbox(label="Topic / prompt")
+                extra = gr.Textbox(label="Extra prompt (optional)")
                 btn_report = gr.Button("Generate Report")
                 btn_quiz = gr.Button("Generate Quiz")
                 btn_podcast = gr.Button("Generate Podcast")
+                artifact_status = gr.Textbox(label="Artifact status", interactive=False)
+                artifacts_list = gr.Dropdown(label="Artifacts", choices=[], interactive=True)
                 download_btn = gr.Button("Download selected")
+                download_file = gr.File(label="Download", interactive=False)
+                podcast_audio = gr.Audio(label="Podcast Audio", interactive=False)
             with gr.Column(scale=2):
+                chatbot = gr.Chatbot(height=520, label="Chat (RAG + citations)")
                 msg = gr.Textbox(label="Message")
                 send = gr.Button("Send")
+        # ---------- LOAD ----------
+        def on_load(request: gr.Request):
+            username = require_login(request)
+            dd, chat, arts = ui_bootstrap(username)
+            # ✅ Return user_box value directly so it always shows
+            return username, username, dd, chat, arts
         demo.load(
             on_load,
             inputs=None,
             outputs=[
                 username_state,
+                user_box,        # ✅ always filled
                 notebook_dd,
                 chatbot,
+                artifacts_list,
             ],
             queue=False,
+            api_name=False,
         )
+        # ---------- EVENTS ----------
         notebook_dd.change(
             on_switch_notebook,
             inputs=[username_state, notebook_dd],
             outputs=[chatbot, artifacts_list],
             queue=False,
+            api_name=False,
         )
         btn_create.click(
             on_create_notebook,
             inputs=[username_state, nb_new],
+            outputs=[notebook_dd, chatbot, artifacts_list],
             queue=False,
+            api_name=False,
         )
         btn_rename.click(
             on_rename_notebook,
+            inputs=[username_state, notebook_dd, nb_rename],
             outputs=[notebook_dd],
             queue=False,
+            api_name=False,
         )
         btn_delete.click(
             on_delete_notebook,
+            inputs=[username_state, notebook_dd],
+            outputs=[notebook_dd, chatbot, artifacts_list],
             queue=False,
+            api_name=False,
         )
         btn_ingest_files.click(
             on_ingest_files,
+            inputs=[username_state, notebook_dd, file_up],
             outputs=[ingest_status],
             queue=True,
+            api_name=False,
         )
         btn_ingest_url.click(
             on_ingest_url,
+            inputs=[username_state, notebook_dd, url_in],
             outputs=[url_status],
             queue=True,
+            api_name=False,
         )
         send.click(
             on_chat,
+            inputs=[username_state, notebook_dd, chatbot, msg],
+            outputs=[chatbot, msg],
             queue=True,
+            api_name=False,
         )
         btn_report.click(
             on_report,
+            inputs=[username_state, notebook_dd, topic, extra],
+            outputs=[artifact_status, artifacts_list, download_file],
             queue=True,
+            api_name=False,
         )
         btn_quiz.click(
             on_quiz,
+            inputs=[username_state, notebook_dd, topic, extra],
+            outputs=[artifact_status, artifacts_list, download_file],
             queue=True,
+            api_name=False,
         )
         btn_podcast.click(
             on_podcast,
+            inputs=[username_state, notebook_dd, topic, extra],
+            outputs=[artifact_status, artifacts_list, download_file, podcast_audio],
             queue=True,
+            api_name=False,
         )
         download_btn.click(
             on_download,
+            inputs=[username_state, notebook_dd, artifacts_list],
             outputs=[download_file],
             queue=False,
+            api_name=False,
         )
+    return demo

src/storage/__pycache__/artifact_store.cpython-310.pyc CHANGED Viewed

Binary files a/src/storage/__pycache__/artifact_store.cpython-310.pyc and b/src/storage/__pycache__/artifact_store.cpython-310.pyc differ

src/storage/__pycache__/chat_store.cpython-310.pyc CHANGED Viewed

Binary files a/src/storage/__pycache__/chat_store.cpython-310.pyc and b/src/storage/__pycache__/chat_store.cpython-310.pyc differ

src/storage/__pycache__/chroma_store.cpython-310.pyc CHANGED Viewed

Binary files a/src/storage/__pycache__/chroma_store.cpython-310.pyc and b/src/storage/__pycache__/chroma_store.cpython-310.pyc differ

src/storage/__pycache__/index_store.cpython-310.pyc CHANGED Viewed

Binary files a/src/storage/__pycache__/index_store.cpython-310.pyc and b/src/storage/__pycache__/index_store.cpython-310.pyc differ

src/storage/__pycache__/paths.cpython-310.pyc CHANGED Viewed

Binary files a/src/storage/__pycache__/paths.cpython-310.pyc and b/src/storage/__pycache__/paths.cpython-310.pyc differ

src/storage/chroma_store.py CHANGED Viewed

@@ -1,17 +1,25 @@
 import os
 import chromadb
 from src.storage.paths import nb_root
 # Cache clients by persist_dir to avoid "ephemeral with different settings"
 _CLIENTS: dict[str, chromadb.PersistentClient] = {}
 def chroma_client(username: str, notebook_id: str) -> chromadb.PersistentClient:
     persist_dir = os.path.join(nb_root(username, notebook_id), "chroma")
     os.makedirs(persist_dir, exist_ok=True)
     if persist_dir not in _CLIENTS:
-        _CLIENTS[persist_dir] = chromadb.PersistentClient(path=persist_dir)
     return _CLIENTS[persist_dir]
 def get_collection(username: str, notebook_id: str, name: str = "chunks"):

 import os
 import chromadb
+from chromadb.config import Settings
 from src.storage.paths import nb_root
 # Cache clients by persist_dir to avoid "ephemeral with different settings"
 _CLIENTS: dict[str, chromadb.PersistentClient] = {}
+# One shared Settings object (important: consistent settings!)
+_SETTINGS = Settings(
+    anonymized_telemetry=False,   # ✅ disables telemetry (stops capture() errors)
+    allow_reset=True,
+)
 def chroma_client(username: str, notebook_id: str) -> chromadb.PersistentClient:
     persist_dir = os.path.join(nb_root(username, notebook_id), "chroma")
     os.makedirs(persist_dir, exist_ok=True)
     if persist_dir not in _CLIENTS:
+        _CLIENTS[persist_dir] = chromadb.PersistentClient(path=persist_dir, settings=_SETTINGS)
     return _CLIENTS[persist_dir]
 def get_collection(username: str, notebook_id: str, name: str = "chunks"):

src/utils/__pycache__/text.cpython-310.pyc CHANGED Viewed

Binary files a/src/utils/__pycache__/text.cpython-310.pyc and b/src/utils/__pycache__/text.cpython-310.pyc differ