NotebookLMClone

Runtime error

App Files Files Community

Hitakshi26 commited on Mar 3

Commit

1bfb390

1 Parent(s): b40b09c

Testing2

Browse files

Files changed (23) hide show

app.py +5 -8
requirements.txt +1 -2
src/backend/__pycache__/artifacts.cpython-310.pyc +0 -0
src/backend/__pycache__/auth.cpython-310.pyc +0 -0
src/backend/__pycache__/ingest.cpython-310.pyc +0 -0
src/backend/__pycache__/llm.cpython-310.pyc +0 -0
src/backend/__pycache__/notebooks.cpython-310.pyc +0 -0
src/backend/__pycache__/rag.cpython-310.pyc +0 -0
src/backend/auth.py +15 -50
src/backend/ingest.py +42 -118
src/backend/llm.py +38 -10
src/backend/rag.py +9 -4
src/frontend/__pycache__/callbacks.cpython-310.pyc +0 -0
src/frontend/__pycache__/ui.cpython-310.pyc +0 -0
src/frontend/callbacks.py +12 -2
src/frontend/ui.py +131 -47
src/storage/__pycache__/artifact_store.cpython-310.pyc +0 -0
src/storage/__pycache__/chat_store.cpython-310.pyc +0 -0
src/storage/__pycache__/chroma_store.cpython-310.pyc +0 -0
src/storage/__pycache__/index_store.cpython-310.pyc +0 -0
src/storage/__pycache__/paths.cpython-310.pyc +0 -0
src/storage/chroma_store.py +18 -16
src/utils/__pycache__/text.cpython-310.pyc +0 -0

app.py CHANGED Viewed

@@ -1,12 +1,9 @@
 import os
-# ----- Disable telemetry / analytics noise -----
-# Gradio analytics (UI usage pings)
-os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
-# HF hub telemetry (optional)
-os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
-# Chroma telemetry (we also disable via Settings in chroma_store.py)
-os.environ["ANONYMIZED_TELEMETRY"] = "False"
 from src.frontend.ui import build_app
@@ -17,4 +14,4 @@ if __name__ == "__main__":
         server_name="0.0.0.0",
         server_port=int(os.getenv("PORT", "7860")),
         show_api=False,
-    )

+#"use client"
 import os
+# Disable Chroma telemetry noise
+os.environ["ANONYMIZED_TELEMETRY"] = "FALSE"
 from src.frontend.ui import build_app
         server_name="0.0.0.0",
         server_port=int(os.getenv("PORT", "7860")),
         show_api=False,
+    )

requirements.txt CHANGED Viewed

@@ -6,6 +6,5 @@ python-pptx==1.0.2
 beautifulsoup4==4.12.3
 requests==2.32.3
 gTTS==2.5.3
-huggingface_hub==0.24.6
-pydub==0.25.1

 beautifulsoup4==4.12.3
 requests==2.32.3
 gTTS==2.5.3
+huggingface_hub>=0.31.0,<1.0

src/backend/__pycache__/artifacts.cpython-310.pyc CHANGED Viewed

Binary files a/src/backend/__pycache__/artifacts.cpython-310.pyc and b/src/backend/__pycache__/artifacts.cpython-310.pyc differ

src/backend/__pycache__/auth.cpython-310.pyc CHANGED Viewed

Binary files a/src/backend/__pycache__/auth.cpython-310.pyc and b/src/backend/__pycache__/auth.cpython-310.pyc differ

src/backend/__pycache__/ingest.cpython-310.pyc CHANGED Viewed

Binary files a/src/backend/__pycache__/ingest.cpython-310.pyc and b/src/backend/__pycache__/ingest.cpython-310.pyc differ

src/backend/__pycache__/llm.cpython-310.pyc CHANGED Viewed

Binary files a/src/backend/__pycache__/llm.cpython-310.pyc and b/src/backend/__pycache__/llm.cpython-310.pyc differ

src/backend/__pycache__/notebooks.cpython-310.pyc CHANGED Viewed

Binary files a/src/backend/__pycache__/notebooks.cpython-310.pyc and b/src/backend/__pycache__/notebooks.cpython-310.pyc differ

src/backend/__pycache__/rag.cpython-310.pyc CHANGED Viewed

Binary files a/src/backend/__pycache__/rag.cpython-310.pyc and b/src/backend/__pycache__/rag.cpython-310.pyc differ

src/backend/auth.py CHANGED Viewed

@@ -2,67 +2,32 @@ import os
 import gradio as gr
-def _header_get(headers: dict, key: str):
-    """Case-insensitive header lookup."""
-    if not headers:
-        return None
-    lk = key.lower()
-    for k, v in headers.items():
-        if str(k).lower() == lk:
-            return v
-    return None
-def get_username_from_request(request: gr.Request) -> str | None:
     """
-    Try multiple ways to extract username from HF Spaces OAuth / proxy.
-    Different Gradio + Spaces versions expose this differently.
     """
-    if request is None:
-        return None
-    # 1) Best-case: gradio sets request.username
     username = getattr(request, "username", None)
     if username:
         return str(username)
-    # 2) Headers (varies by HF proxy / auth config)
-    headers = getattr(request, "headers", None) or {}
-    for key in (
         "x-forwarded-user",
         "x-hf-user",
         "x-forwarded-preferred-username",
         "x-auth-request-preferred-username",
-        "hf-user",
-    ):
-        v = _header_get(headers, key)
-        if v:
-            return str(v)
-    # 3) Some deployments put user info in query params / cookies (rare)
-    # Keep simple: if not found, return None
-    return None
-def require_login(request: gr.Request) -> str:
-    """
-    'Strict' login:
-    - If REQUIRE_LOGIN=1, enforce that we got a username.
-    - Otherwise, gracefully fall back to a guest user (so the app runs).
-    """
-    username = get_username_from_request(request)
-    if username:
-        return username
-    # Local/dev always allow
     if os.getenv("HF_SPACE_ID") is None:
         return "localuser"
-    # On Spaces: optionally enforce
-    if os.getenv("REQUIRE_LOGIN", "0") == "1":
-        raise gr.Error("Please log in using 'Sign in with Hugging Face' to use this app.")
-    # Default: allow guest mode so the app works
-    # NOTE: single shared guest account. If you want per-user separation without auth:
-    # use request.client.host or a random session id (but keep simple for submission).
-    return "guest"

 import gradio as gr
+def require_login(request: gr.Request) -> str:
     """
+    Hugging Face Spaces OAuth provides user info via request in some Gradio versions,
+    but not always. We use multiple fallbacks:
+    1) request.username (best case)
+    2) HF-proxy headers (x-forwarded-*)
+    3) local/dev fallback
     """
+    # 1) Best-case Gradio field
     username = getattr(request, "username", None)
     if username:
         return str(username)
+    # 2) Fallback: HF spaces headers (varies by proxy/version)
+    headers = getattr(request, "headers", {}) or {}
+    for key in [
         "x-forwarded-user",
         "x-hf-user",
         "x-forwarded-preferred-username",
         "x-auth-request-preferred-username",
+    ]:
+        if key in headers and headers[key]:
+            return str(headers[key])
+    # 3) Optional local fallback (so app doesn't hard-crash during dev)
     if os.getenv("HF_SPACE_ID") is None:
         return "localuser"
+    raise gr.Error("Please log in using 'Sign in with Hugging Face' to use this app.")

src/backend/ingest.py CHANGED Viewed

@@ -1,6 +1,4 @@
-import os
-import pathlib
-import hashlib
 import requests
 from bs4 import BeautifulSoup
 from pypdf import PdfReader
@@ -14,15 +12,13 @@ from src.utils.text import safe_name
 EMBED_MODEL = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-# -------------------------
-# Helpers
-# -------------------------
-def _sha10_bytes(b: bytes) -> str:
-    return hashlib.sha256(b).hexdigest()[:10]
-def _sha10_text(s: str) -> str:
-    return hashlib.sha256((s or "").encode("utf-8", errors="ignore")).hexdigest()[:10]
 def simple_chunk(text: str, max_chars=2200, overlap=250):
     text = "\n".join([ln.strip() for ln in (text or "").splitlines() if ln.strip()]).strip()
@@ -34,25 +30,19 @@ def simple_chunk(text: str, max_chars=2200, overlap=250):
     while start < len(text):
         end = min(len(text), start + max_chars)
         out.append(text[start:end])
-        if end == len(text):
-            break
         start = max(0, end - overlap)
     return out
-# -------------------------
-# Extractors
-# -------------------------
 def extract_pdf(path: str):
     reader = PdfReader(path)
     items = []
     for i, page in enumerate(reader.pages):
         txt = (page.extract_text() or "").strip()
         if txt:
-            items.append({"text": txt, "page": i + 1})
     return items
 def extract_pptx(path: str):
     prs = Presentation(path)
     items = []
@@ -63,108 +53,63 @@ def extract_pptx(path: str):
                 texts.append(shape.text)
         txt = "\n".join(t.strip() for t in texts if t.strip()).strip()
         if txt:
-            items.append({"text": txt, "slide": i + 1})
     return items
 def extract_txt(path: str):
     with open(path, "r", encoding="utf-8", errors="ignore") as f:
         txt = f.read().strip()
-    return [{"text": txt}] if txt else []
 def extract_url(url: str):
     r = requests.get(url, timeout=15, headers={"User-Agent": "Mozilla/5.0"})
     r.raise_for_status()
     soup = BeautifulSoup(r.text, "html.parser")
-    for tag in soup(["script", "style", "noscript"]):
         tag.decompose()
     text = soup.get_text("\n")
     text = "\n".join([ln.strip() for ln in text.splitlines() if ln.strip()])
-    # hard cap so we don’t embed infinite pages
-    return [{"text": text[:200000]}]
-# -------------------------
-# Chroma upsert
-# -------------------------
-def upsert_extracted(
-    username: str,
-    notebook_id: str,
-    source_title: str,
-    source_id: str,
-    extracted_items: list[dict],
-) -> int:
-    col = get_collection(username, notebook_id)
     ids, docs, metas = [], [], []
     for item_idx, item in enumerate(extracted_items):
-        page = item.get("page")
-        slide = item.get("slide")
-        # stable location string (never None)
-        if page is not None:
-            loc = f"p{int(page)}"
-        elif slide is not None:
-            loc = f"s{int(slide)}"
-        else:
-            loc = f"item{item_idx}"
-        chunks = simple_chunk(item.get("text", ""))
-        for chunk_idx, ch in enumerate(chunks):
-            # ✅ unique per (source + loc + chunk)
-            cid = f"{source_id}::{loc}::chunk{chunk_idx}"
-            ids.append(cid)
             docs.append(ch)
             meta = {
-                "source_title": str(source_title),
-                "source_id": str(source_id),
             }
-            # ✅ Chroma metadata cannot contain None → only set if present
-            if page is not None:
-                meta["page"] = int(page)
-            if slide is not None:
-                meta["slide"] = int(slide)
             metas.append(meta)
     if not docs:
         return 0
     embs = EMBED_MODEL.encode(docs, normalize_embeddings=True).tolist()
     col.upsert(ids=ids, documents=docs, metadatas=metas, embeddings=embs)
     return len(docs)
-# -------------------------
-# Public API used by callbacks.py
-# -------------------------
 def ingest_files(username: str, notebook_id: str, files) -> int:
     ensure_tree(username, notebook_id)
     raw_dir = os.path.join(nb_root(username, notebook_id), "files_raw")
     ex_dir = os.path.join(nb_root(username, notebook_id), "files_extracted")
-    os.makedirs(raw_dir, exist_ok=True)
-    os.makedirs(ex_dir, exist_ok=True)
     added = 0
     for f in (files or []):
-        fp = getattr(f, "name", None)
         if not fp:
             continue
-        # copy uploaded file to raw_dir
-        src_path = pathlib.Path(fp)
-        file_bytes = src_path.read_bytes()
-        base = os.path.basename(fp)
-        dest = os.path.join(raw_dir, base)
-        pathlib.Path(dest).write_bytes(file_bytes)
         ext = os.path.splitext(dest)[1].lower()
         if ext == ".pdf":
@@ -176,52 +121,31 @@ def ingest_files(username: str, notebook_id: str, files) -> int:
         else:
             continue
-        # save extracted text
-        ex_path = os.path.join(ex_dir, base + ".txt")
         with open(ex_path, "w", encoding="utf-8") as ftxt:
             for item in extracted:
                 loc = ""
-                if item.get("page") is not None:
                     loc = f"page={item.get('page')}"
-                elif item.get("slide") is not None:
                     loc = f"slide={item.get('slide')}"
-                ftxt.write(f"\n--- {loc} ---\n{item.get('text','')}\n")
-        # ✅ Add a hash so repeated ingest of same filename won't collide
-        file_hash = _sha10_bytes(file_bytes)
-        source_id = f"file:{base}:{file_hash}"
         added += upsert_extracted(
-            username=username,
-            notebook_id=notebook_id,
-            source_title=base,
-            source_id=source_id,
-            extracted_items=extracted,
         )
     return added
 def ingest_url(username: str, notebook_id: str, url: str) -> int:
     ensure_tree(username, notebook_id)
     extracted = extract_url(url)
     ex_dir = os.path.join(nb_root(username, notebook_id), "files_extracted")
-    os.makedirs(ex_dir, exist_ok=True)
-    # save extracted page text
-    fname = safe_name(url.replace("https://", "").replace("http://", "").replace("/", "_")) + ".txt"
     with open(os.path.join(ex_dir, fname), "w", encoding="utf-8") as f:
-        f.write(extracted[0].get("text", ""))
-    # ✅ Hash text so re-ingest doesn’t collide
-    text_hash = _sha10_text(extracted[0].get("text", ""))
-    source_id = f"url:{url}:{text_hash}"
-    return upsert_extracted(
-        username=username,
-        notebook_id=notebook_id,
-        source_title=url,
-        source_id=source_id,
-        extracted_items=extracted,
-    )

+import os, pathlib
 import requests
 from bs4 import BeautifulSoup
 from pypdf import PdfReader
 EMBED_MODEL = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+def _file_path_from_gradio_obj(file_obj):
+    if isinstance(file_obj, str):
+        return file_obj
+    path = getattr(file_obj, "name", None)
+    if isinstance(path, str):
+        return path
+    return None
 def simple_chunk(text: str, max_chars=2200, overlap=250):
     text = "\n".join([ln.strip() for ln in (text or "").splitlines() if ln.strip()]).strip()
     while start < len(text):
         end = min(len(text), start + max_chars)
         out.append(text[start:end])
+        if end == len(text): break
         start = max(0, end - overlap)
     return out
 def extract_pdf(path: str):
     reader = PdfReader(path)
     items = []
     for i, page in enumerate(reader.pages):
         txt = (page.extract_text() or "").strip()
         if txt:
+            items.append({"text": txt, "page": i+1})
     return items
 def extract_pptx(path: str):
     prs = Presentation(path)
     items = []
                 texts.append(shape.text)
         txt = "\n".join(t.strip() for t in texts if t.strip()).strip()
         if txt:
+            items.append({"text": txt, "slide": i+1})
     return items
 def extract_txt(path: str):
     with open(path, "r", encoding="utf-8", errors="ignore") as f:
         txt = f.read().strip()
+    return [{"text": txt, "page": None}] if txt else []
 def extract_url(url: str):
     r = requests.get(url, timeout=15, headers={"User-Agent": "Mozilla/5.0"})
     r.raise_for_status()
     soup = BeautifulSoup(r.text, "html.parser")
+    for tag in soup(["script","style","noscript"]):
         tag.decompose()
     text = soup.get_text("\n")
     text = "\n".join([ln.strip() for ln in text.splitlines() if ln.strip()])
+    return [{"text": text[:200000], "page": None}]
+def upsert_extracted(username: str, notebook_id: str, source_title: str, source_id: str, extracted_items: list[dict]) -> int:
+    col = get_collection(username, notebook_id)
     ids, docs, metas = [], [], []
+    chunk_counter = 0
     for item_idx, item in enumerate(extracted_items):
+        for j, ch in enumerate(simple_chunk(item["text"])):
+            ids.append(f"{source_id}::item{item_idx}::chunk{j}::{chunk_counter}")
             docs.append(ch)
             meta = {
+                "source_title": source_title,
+                "source_id": source_id,
+                "page": item.get("page"),
+                "slide": item.get("slide"),
             }
+            meta = {k: v for k, v in meta.items() if v is not None}
             metas.append(meta)
+            chunk_counter += 1
     if not docs:
         return 0
     embs = EMBED_MODEL.encode(docs, normalize_embeddings=True).tolist()
     col.upsert(ids=ids, documents=docs, metadatas=metas, embeddings=embs)
     return len(docs)
 def ingest_files(username: str, notebook_id: str, files) -> int:
     ensure_tree(username, notebook_id)
     raw_dir = os.path.join(nb_root(username, notebook_id), "files_raw")
     ex_dir = os.path.join(nb_root(username, notebook_id), "files_extracted")
     added = 0
     for f in (files or []):
+        fp = _file_path_from_gradio_obj(f)
         if not fp:
             continue
+        if not os.path.exists(fp):
+            continue
+        dest = os.path.join(raw_dir, os.path.basename(fp))
+        pathlib.Path(dest).write_bytes(pathlib.Path(fp).read_bytes())
         ext = os.path.splitext(dest)[1].lower()
         if ext == ".pdf":
         else:
             continue
+        ex_path = os.path.join(ex_dir, os.path.basename(dest) + ".txt")
         with open(ex_path, "w", encoding="utf-8") as ftxt:
             for item in extracted:
                 loc = ""
+                if item.get("page"):
                     loc = f"page={item.get('page')}"
+                elif item.get("slide"):
                     loc = f"slide={item.get('slide')}"
+                ftxt.write(f"\n--- {loc} ---\n{item['text']}\n")
         added += upsert_extracted(
+            username,
+            notebook_id,
+            os.path.basename(dest),
+            f"file:{os.path.basename(dest)}",
+            extracted,
         )
     return added
 def ingest_url(username: str, notebook_id: str, url: str) -> int:
     ensure_tree(username, notebook_id)
     extracted = extract_url(url)
     ex_dir = os.path.join(nb_root(username, notebook_id), "files_extracted")
+    fname = safe_name(url.replace("https://","").replace("http://","").replace("/","_")) + ".txt"
     with open(os.path.join(ex_dir, fname), "w", encoding="utf-8") as f:
+        f.write(extracted[0]["text"])
+    return upsert_extracted(username, notebook_id, url, f"url:{url}", extracted)

src/backend/llm.py CHANGED Viewed

@@ -1,20 +1,48 @@
 import os
 import gradio as gr
 from huggingface_hub import InferenceClient
 HF_INFERENCE_TOKEN = os.environ.get("HF_INFERENCE_TOKEN","").strip()
 HF_LLM_MODEL = os.environ.get("HF_LLM_MODEL","HuggingFaceH4/zephyr-7b-beta").strip()
-_client = InferenceClient(model=HF_LLM_MODEL, token=HF_INFERENCE_TOKEN) if HF_INFERENCE_TOKEN else None
 def llm_generate(prompt: str, max_new_tokens=450, temperature=0.2) -> str:
     if _client is None:
-        raise gr.Error("HF_INFERENCE_TOKEN not set. Add it in Space secrets.")
-    out = _client.text_generation(
-        prompt,
-        max_new_tokens=max_new_tokens,
-        temperature=temperature,
-        do_sample=temperature > 0,
-        return_full_text=False,
-    )
-    return (out or "").strip()

 import os
 import gradio as gr
 from huggingface_hub import InferenceClient
+from huggingface_hub.utils import HfHubHTTPError
 HF_INFERENCE_TOKEN = os.environ.get("HF_INFERENCE_TOKEN","").strip()
+HF_TOKEN = os.environ.get("HF_TOKEN", "").strip()
+HF_API_TOKEN = HF_INFERENCE_TOKEN or HF_TOKEN
 HF_LLM_MODEL = os.environ.get("HF_LLM_MODEL","HuggingFaceH4/zephyr-7b-beta").strip()
+_client = InferenceClient(model=HF_LLM_MODEL, token=HF_API_TOKEN) if HF_API_TOKEN else None
 def llm_generate(prompt: str, max_new_tokens=450, temperature=0.2) -> str:
     if _client is None:
+        raise gr.Error("Set HF_INFERENCE_TOKEN (or HF_TOKEN) in Space secrets or local environment.")
+    try:
+        out = _client.text_generation(
+            prompt,
+            max_new_tokens=max_new_tokens,
+            temperature=temperature,
+            do_sample=temperature > 0,
+            return_full_text=False,
+        )
+        return (out or "").strip()
+    except ValueError as e:
+        msg = str(e)
+        if "not supported for task text-generation" in msg or "Supported task: conversational" in msg:
+            try:
+                resp = _client.chat.completions.create(
+                    model=HF_LLM_MODEL,
+                    messages=[{"role": "user", "content": prompt}],
+                    max_tokens=max_new_tokens,
+                    temperature=temperature,
+                )
+                choice = (resp.choices or [None])[0]
+                content = getattr(getattr(choice, "message", None), "content", "") if choice else ""
+                return (content or "").strip()
+            except Exception as inner:
+                raise gr.Error(f"LLM request failed after conversational fallback: {inner}")
+        raise gr.Error(f"LLM request failed: {msg}")
+    except HfHubHTTPError as e:
+        msg = str(e)
+        if "api-inference.huggingface.co is no longer supported" in msg or "410 Client Error" in msg:
+            raise gr.Error(
+                "Your Hugging Face Hub client is outdated for inference routing. "
+                "Upgrade `huggingface_hub` and restart the app."
+            )
+        raise gr.Error(f"LLM request failed: {msg}")

src/backend/rag.py CHANGED Viewed

@@ -8,13 +8,18 @@ EMBED_MODEL = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 def retrieve(username: str, notebook_id: str, query: str, k=6):
     col = get_collection(username, notebook_id)
     qemb = EMBED_MODEL.encode([query], normalize_embeddings=True).tolist()
     res = col.query(
-    query_embeddings=qemb,
-    n_results=k,
-    include=["documents", "metadatas", "distances"]
-)
     ids = res.get("ids", [[]])[0]
     docs = res.get("documents", [[]])[0]

 def retrieve(username: str, notebook_id: str, query: str, k=6):
     col = get_collection(username, notebook_id)
+    current_count = col.count()
+    if current_count <= 0:
+        return []
+    n_results = min(k, current_count)
     qemb = EMBED_MODEL.encode([query], normalize_embeddings=True).tolist()
     res = col.query(
+        query_embeddings=qemb,
+        n_results=n_results,
+        include=["documents", "metadatas", "distances"],
+    )
     ids = res.get("ids", [[]])[0]
     docs = res.get("documents", [[]])[0]

src/frontend/__pycache__/callbacks.cpython-310.pyc CHANGED Viewed

Binary files a/src/frontend/__pycache__/callbacks.cpython-310.pyc and b/src/frontend/__pycache__/callbacks.cpython-310.pyc differ

src/frontend/__pycache__/ui.cpython-310.pyc CHANGED Viewed

Binary files a/src/frontend/__pycache__/ui.cpython-310.pyc and b/src/frontend/__pycache__/ui.cpython-310.pyc differ

src/frontend/callbacks.py CHANGED Viewed

@@ -91,7 +91,12 @@ def on_ingest_files(username: str, notebook_id: str, files):
     _require_notebook(notebook_id)
     if not files:
         raise gr.Error("Upload at least one file.")
-    added = ingest_files_backend(username, notebook_id, files)
     return f"Ingested files. Added {added} chunks."
@@ -100,7 +105,12 @@ def on_ingest_url(username: str, notebook_id: str, url: str):
     url = (url or "").strip()
     if not url:
         raise gr.Error("Enter a URL.")
-    added = ingest_url_backend(username, notebook_id, url)
     return f"Ingested URL. Added {added} chunks."

     _require_notebook(notebook_id)
     if not files:
         raise gr.Error("Upload at least one file.")
+    try:
+        added = ingest_files_backend(username, notebook_id, files)
+    except Exception as e:
+        raise gr.Error(f"File ingest failed: {e}")
+    if added == 0:
+        raise gr.Error("No chunks were indexed. Use supported files (PDF/PPTX/TXT) with extractable text.")
     return f"Ingested files. Added {added} chunks."
     url = (url or "").strip()
     if not url:
         raise gr.Error("Enter a URL.")
+    try:
+        added = ingest_url_backend(username, notebook_id, url)
+    except Exception as e:
+        raise gr.Error(f"URL ingest failed: {e}")
+    if added == 0:
+        raise gr.Error("No chunks were indexed from the URL.")
     return f"Ingested URL. Added {added} chunks."

src/frontend/ui.py CHANGED Viewed

@@ -16,79 +16,163 @@ from src.frontend.callbacks import (
 from src.backend.auth import require_login
-def build_app():
-    with gr.Blocks(title="NotebookLM Clone") as demo:
-        gr.Markdown("# 📓 NotebookLM Clone (HF Auth + Chroma + RAG)")
-        gr.LoginButton().activate()
-        username_state = gr.State("")
-        # ---------- UI ----------
-        with gr.Row():
-            with gr.Column(scale=1):
-                user_box = gr.Textbox(label="User", interactive=False)
-                notebook_dd = gr.Dropdown(label="Notebooks", choices=[], interactive=True)
-                nb_new = gr.Textbox(label="Create notebook", placeholder="Name")
-                btn_create = gr.Button("Create")
-                nb_rename = gr.Textbox(label="Rename notebook", placeholder="New name")
-                btn_rename = gr.Button("Rename")
-                btn_delete = gr.Button("Delete current", variant="stop")
-                gr.Markdown("## Ingest")
-                file_up = gr.File(label="Upload PDF/PPTX/TXT", file_count="multiple")
-                btn_ingest_files = gr.Button("Ingest Files")
-                ingest_status = gr.Textbox(label="Status", interactive=False)
-                url_in = gr.Textbox(label="URL", placeholder="https://...")
-                btn_ingest_url = gr.Button("Ingest URL")
-                url_status = gr.Textbox(label="Status", interactive=False)
-                gr.Markdown("## Artifacts")
-                topic = gr.Textbox(label="Topic / prompt")
-                extra = gr.Textbox(label="Extra prompt (optional)")
-                btn_report = gr.Button("Generate Report")
-                btn_quiz = gr.Button("Generate Quiz")
-                btn_podcast = gr.Button("Generate Podcast")
-                artifact_status = gr.Textbox(label="Artifact status", interactive=False)
-                artifacts_list = gr.Dropdown(label="Artifacts", choices=[], interactive=True)
-                download_btn = gr.Button("Download selected")
-                download_file = gr.File(label="Download", interactive=False)
-                podcast_audio = gr.Audio(label="Podcast Audio", interactive=False)
-            with gr.Column(scale=2):
-                chatbot = gr.Chatbot(height=520, label="Chat (RAG + citations)")
-                msg = gr.Textbox(label="Message")
-                send = gr.Button("Send")
-        # ---------- LOAD ----------
         def on_load(request: gr.Request):
-            username = require_login(request)  # will fall back to "guest" if missing
             dd, chat, arts = ui_bootstrap(username)
             return username, dd, chat, arts
         demo.load(
             on_load,
             inputs=None,
-            outputs=[
-                username_state,
-                user_box,        # ✅ always filled
-                notebook_dd,
-                chatbot,
-                artifacts_list,
-            ],
             queue=False,
             api_name=False,
         )
-        # ---------- EVENTS ----------
         notebook_dd.change(
             on_switch_notebook,
             inputs=[username_state, notebook_dd],

 from src.backend.auth import require_login
+CUSTOM_CSS = """
+.gradio-container {
+  max-width: 1320px !important;
+  margin: 0 auto !important;
+  padding-top: 18px !important;
+}
+.hero {
+  border: 1px solid rgba(255,255,255,.08);
+  border-radius: 16px;
+  padding: 16px 18px;
+  background: linear-gradient(145deg, rgba(50,87,255,.16), rgba(145,92,255,.12));
+  backdrop-filter: blur(6px);
+}
+.hero h1 {
+  margin: 0;
+  font-size: 1.5rem;
+  letter-spacing: .2px;
+}
+.hero p {
+  margin: 6px 0 0;
+  opacity: .88;
+}
+.panel {
+  border: 1px solid rgba(255,255,255,.08);
+  border-radius: 14px;
+  background: linear-gradient(180deg, rgba(255,255,255,.04), rgba(255,255,255,.02));
+  padding: 10px;
+}
+.chat-panel {
+  border: 1px solid rgba(255,255,255,.08);
+  border-radius: 14px;
+  padding: 10px;
+  background: linear-gradient(180deg, rgba(255,255,255,.04), rgba(255,255,255,.02));
+}
+.chat-panel .message-wrap {
+  border-radius: 12px;
+}
+.chat-input textarea {
+  min-height: 92px !important;
+}
+.primary-btn button {
+  border-radius: 10px !important;
+}
+"""
+def build_app():
+    theme = gr.themes.Soft(
+        primary_hue="blue",
+        secondary_hue="indigo",
+        neutral_hue="slate",
+        spacing_size="md",
+        radius_size="lg",
+    )
+    with gr.Blocks(title="NotebookLM Clone", theme=theme, css=CUSTOM_CSS) as demo:
+        gr.Markdown(
+            """
+<div class='hero'>
+  <h1>📓 NotebookLM Clone</h1>
+  <p>Organize notebooks, ingest sources, and chat with RAG-backed citations.</p>
+</div>
+            """
+        )
+        login = gr.LoginButton()
+        login.activate()
+        username_state = gr.State("")
         def on_load(request: gr.Request):
+            username = require_login(request)
             dd, chat, arts = ui_bootstrap(username)
             return username, dd, chat, arts
+        with gr.Row(equal_height=True):
+            with gr.Column(scale=1, min_width=360, elem_classes=["panel"]):
+                user_box = gr.Textbox(label="User", interactive=False)
+                with gr.Accordion("Notebook", open=True):
+                    notebook_dd = gr.Dropdown(
+                        label="Notebooks",
+                        choices=[],
+                        interactive=True,
+                    )
+                    nb_new = gr.Textbox(label="Create notebook", placeholder="Name")
+                    btn_create = gr.Button("Create", elem_classes=["primary-btn"])
+                    nb_rename = gr.Textbox(label="Rename notebook", placeholder="New name")
+                    btn_rename = gr.Button("Rename")
+                    btn_delete = gr.Button("Delete current", variant="stop")
+                with gr.Accordion("Ingest", open=True):
+                    file_up = gr.File(label="Upload PDF / PPTX / TXT", file_count="multiple")
+                    btn_ingest_files = gr.Button("Ingest Files", elem_classes=["primary-btn"])
+                    ingest_status = gr.Textbox(label="File ingest status", interactive=False)
+                    url_in = gr.Textbox(label="URL", placeholder="https://...")
+                    btn_ingest_url = gr.Button("Ingest URL")
+                    url_status = gr.Textbox(label="URL ingest status", interactive=False)
+                with gr.Accordion("Artifacts", open=False):
+                    topic = gr.Textbox(label="Topic / prompt")
+                    extra = gr.Textbox(label="Extra prompt (optional)")
+                    with gr.Row():
+                        btn_report = gr.Button("Generate Report")
+                        btn_quiz = gr.Button("Generate Quiz")
+                        btn_podcast = gr.Button("Generate Podcast")
+                    artifact_status = gr.Textbox(label="Artifact status", interactive=False)
+                    artifacts_list = gr.Dropdown(label="Artifacts", choices=[], interactive=True)
+                    download_btn = gr.Button("Download selected")
+                    download_file = gr.File(label="Download", interactive=False)
+                    podcast_audio = gr.Audio(label="Podcast Audio", interactive=False)
+            with gr.Column(scale=2, min_width=560, elem_classes=["chat-panel"]):
+                chatbot = gr.Chatbot(
+                    height=520,
+                    label="Chat (RAG + citations)",
+                    bubble_full_width=False,
+                )
+                with gr.Row():
+                    msg = gr.Textbox(
+                        label="Message",
+                        placeholder="Ask about your uploaded sources...",
+                        elem_classes=["chat-input"],
+                        scale=5,
+                    )
+                    send = gr.Button(
+                        "Send",
+                        variant="primary",
+                        scale=1,
+                        elem_classes=["primary-btn"],
+                    )
         demo.load(
             on_load,
             inputs=None,
+            outputs=[username_state, notebook_dd, chatbot, artifacts_list],
+            queue=False,
+            api_name=False,
+        )
+        username_state.change(
+            lambda u: u,
+            inputs=username_state,
+            outputs=user_box,
             queue=False,
             api_name=False,
         )
         notebook_dd.change(
             on_switch_notebook,
             inputs=[username_state, notebook_dd],

src/storage/__pycache__/artifact_store.cpython-310.pyc CHANGED Viewed

Binary files a/src/storage/__pycache__/artifact_store.cpython-310.pyc and b/src/storage/__pycache__/artifact_store.cpython-310.pyc differ

src/storage/__pycache__/chat_store.cpython-310.pyc CHANGED Viewed

Binary files a/src/storage/__pycache__/chat_store.cpython-310.pyc and b/src/storage/__pycache__/chat_store.cpython-310.pyc differ

src/storage/__pycache__/chroma_store.cpython-310.pyc CHANGED Viewed

Binary files a/src/storage/__pycache__/chroma_store.cpython-310.pyc and b/src/storage/__pycache__/chroma_store.cpython-310.pyc differ

src/storage/__pycache__/index_store.cpython-310.pyc CHANGED Viewed

Binary files a/src/storage/__pycache__/index_store.cpython-310.pyc and b/src/storage/__pycache__/index_store.cpython-310.pyc differ

src/storage/__pycache__/paths.cpython-310.pyc CHANGED Viewed

Binary files a/src/storage/__pycache__/paths.cpython-310.pyc and b/src/storage/__pycache__/paths.cpython-310.pyc differ

src/storage/chroma_store.py CHANGED Viewed

@@ -1,27 +1,29 @@
 import os
 import chromadb
-from chromadb.config import Settings
 from src.storage.paths import nb_root
-# Cache clients by persist_dir to avoid "ephemeral with different settings"
-_CLIENTS: dict[str, chromadb.PersistentClient] = {}
-# One shared Settings object (important: consistent settings!)
-_SETTINGS = Settings(
-    anonymized_telemetry=False,   # ✅ disables telemetry (stops capture() errors)
-    allow_reset=True,
-)
-def chroma_client(username: str, notebook_id: str) -> chromadb.PersistentClient:
-    persist_dir = os.path.join(nb_root(username, notebook_id), "chroma")
     os.makedirs(persist_dir, exist_ok=True)
-    if persist_dir not in _CLIENTS:
-        _CLIENTS[persist_dir] = chromadb.PersistentClient(path=persist_dir, settings=_SETTINGS)
-    return _CLIENTS[persist_dir]
-def get_collection(username: str, notebook_id: str, name: str = "chunks"):
     client = chroma_client(username, notebook_id)
-    return client.get_or_create_collection(name=name)

 import os
 import chromadb
 from src.storage.paths import nb_root
+def chroma_client(username, notebook_id):
+    persist_dir = os.path.join(
+        nb_root(username, notebook_id),
+        "chroma"
+    )
     os.makedirs(persist_dir, exist_ok=True)
+    return chromadb.PersistentClient(
+        path=persist_dir,
+        settings=chromadb.config.Settings(
+            anonymized_telemetry=False
+        )
+    )
+def get_collection(username, notebook_id):
     client = chroma_client(username, notebook_id)
+    return client.get_or_create_collection(
+        name="notebook"
+    )

src/utils/__pycache__/text.cpython-310.pyc CHANGED Viewed

Binary files a/src/utils/__pycache__/text.cpython-310.pyc and b/src/utils/__pycache__/text.cpython-310.pyc differ