NotebookLMClone

Runtime error

App Files Files Community

Hitakshi26 commited on Mar 2

Commit

b4c7867

1 Parent(s): e0ae835

NotebookLM Clone

Browse files

Files changed (32) hide show

README.md +0 -12
app.py +6 -0
requirements.txt +11 -0
src/backend/__pycache__/artifacts.cpython-310.pyc +0 -0
src/backend/__pycache__/auth.cpython-310.pyc +0 -0
src/backend/__pycache__/ingest.cpython-310.pyc +0 -0
src/backend/__pycache__/llm.cpython-310.pyc +0 -0
src/backend/__pycache__/notebooks.cpython-310.pyc +0 -0
src/backend/__pycache__/rag.cpython-310.pyc +0 -0
src/backend/artifacts.py +82 -0
src/backend/auth.py +7 -0
src/backend/ingest.py +122 -0
src/backend/llm.py +20 -0
src/backend/notebooks.py +49 -0
src/backend/rag.py +56 -0
src/frontend/__pycache__/callbacks.cpython-310.pyc +0 -0
src/frontend/__pycache__/ui.cpython-310.pyc +0 -0
src/frontend/callbacks.py +129 -0
src/frontend/ui.py +80 -0
src/storage/__pycache__/artifact_store.cpython-310.pyc +0 -0
src/storage/__pycache__/chat_store.cpython-310.pyc +0 -0
src/storage/__pycache__/chroma_store.cpython-310.pyc +0 -0
src/storage/__pycache__/index_store.cpython-310.pyc +0 -0
src/storage/__pycache__/paths.cpython-310.pyc +0 -0
src/storage/artifact_store.py +21 -0
src/storage/chat_store.py +22 -0
src/storage/chroma_store.py +13 -0
src/storage/index_store.py +31 -0
src/storage/paths.py +33 -0
src/utils/__pycache__/text.cpython-310.pyc +0 -0
src/utils/text.py +7 -0
src/utils/timing.py +0 -0

README.md CHANGED Viewed

@@ -1,12 +0,0 @@
----
-title: GPP1
-emoji: 🏆
-colorFrom: blue
-colorTo: green
-sdk: gradio
-sdk_version: 6.8.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from src.frontend.ui import build_app
+demo = build_app()
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+gradio[oauth]==4.44.1
+chromadb==0.5.5
+sentence-transformers==3.0.1
+pypdf==4.3.1
+python-pptx==1.0.2
+beautifulsoup4==4.12.3
+requests==2.32.3
+gTTS==2.5.3
+huggingface_hub==0.24.6
+#hugging face token = REMOVED_SECRET

src/backend/__pycache__/artifacts.cpython-310.pyc ADDED Viewed

Binary file (2.12 kB). View file

src/backend/__pycache__/auth.cpython-310.pyc ADDED Viewed

Binary file (461 Bytes). View file

src/backend/__pycache__/ingest.cpython-310.pyc ADDED Viewed

Binary file (4.94 kB). View file

src/backend/__pycache__/llm.cpython-310.pyc ADDED Viewed

Binary file (841 Bytes). View file

src/backend/__pycache__/notebooks.cpython-310.pyc ADDED Viewed

Binary file (1.9 kB). View file

src/backend/__pycache__/rag.cpython-310.pyc ADDED Viewed

Binary file (2.36 kB). View file

src/backend/artifacts.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import re
+from gtts import gTTS
+from src.backend.llm import llm_generate
+from src.backend.rag import format_sources, context_block
+def generate_report(topic: str, hits, extra_prompt: str):
+    prompt = f"""
+Write a markdown study report grounded ONLY in the sources.
+Every non-trivial claim must include citations like [S1].
+Topic: {topic}
+Extra instructions: {extra_prompt or "(none)"}
+Sources list:
+{format_sources(hits)}
+Excerpts:
+{context_block(hits)}
+Output:
+# Report
+## Key Concepts
+## Detailed Notes
+## Key Takeaways
+"""
+    return llm_generate(prompt, max_new_tokens=900, temperature=0.25)
+def generate_quiz(topic: str, hits, extra_prompt: str):
+    prompt = f"""
+Write a markdown quiz grounded ONLY in the sources.
+Create 8 questions:
+- 5 multiple choice
+- 3 short answer
+Then include an Answer Key with explanations.
+Explanations must include citations like [S1].
+Topic: {topic}
+Extra instructions: {extra_prompt or "(none)"}
+Sources list:
+{format_sources(hits)}
+Excerpts:
+{context_block(hits)}
+Output:
+# Quiz
+## Questions
+## Answer Key
+"""
+    return llm_generate(prompt, max_new_tokens=900, temperature=0.25)
+def generate_podcast_transcript(topic: str, hits, extra_prompt: str):
+    prompt = f"""
+Write a markdown podcast transcript grounded ONLY in the sources.
+Two speakers: Speaker 1 and Speaker 2.
+Every non-trivial claim must include citations like [S1].
+Topic: {topic}
+Extra instructions: {extra_prompt or "(none)"}
+Sources list:
+{format_sources(hits)}
+Excerpts:
+{context_block(hits)}
+Output:
+# Podcast Transcript
+**Speaker 1:** ...
+**Speaker 2:** ...
+End with Sources section.
+"""
+    return llm_generate(prompt, max_new_tokens=900, temperature=0.3)
+def transcript_to_mp3(transcript_md: str, out_path: str):
+    text = re.sub(r"\[(S\d+)\]", "", transcript_md)
+    text = re.sub(r"#+", "", text)
+    text = re.sub(r"\*\*", "", text)
+    text = re.sub(r"\s+", " ", text).strip()
+    text = text[:4500]
+    gTTS(text=text, lang="en").save(out_path)

src/backend/auth.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import gradio as gr
+def require_login(request: gr.Request) -> str:
+    username = getattr(request, "username", None)
+    if not username:
+        raise gr.Error("Please log in using 'Sign in with Hugging Face' to use this app.")
+    return username

src/backend/ingest.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import os, pathlib
+import requests
+from bs4 import BeautifulSoup
+from pypdf import PdfReader
+from pptx import Presentation
+from sentence_transformers import SentenceTransformer
+from src.storage.paths import nb_root, ensure_tree
+from src.storage.chroma_store import get_collection
+from src.utils.text import safe_name
+EMBED_MODEL = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+def simple_chunk(text: str, max_chars=2200, overlap=250):
+    text = "\n".join([ln.strip() for ln in (text or "").splitlines() if ln.strip()]).strip()
+    if not text:
+        return []
+    if len(text) <= max_chars:
+        return [text]
+    out, start = [], 0
+    while start < len(text):
+        end = min(len(text), start + max_chars)
+        out.append(text[start:end])
+        if end == len(text): break
+        start = max(0, end - overlap)
+    return out
+def extract_pdf(path: str):
+    reader = PdfReader(path)
+    items = []
+    for i, page in enumerate(reader.pages):
+        txt = (page.extract_text() or "").strip()
+        if txt:
+            items.append({"text": txt, "page": i+1})
+    return items
+def extract_pptx(path: str):
+    prs = Presentation(path)
+    items = []
+    for i, slide in enumerate(prs.slides):
+        texts = []
+        for shape in slide.shapes:
+            if hasattr(shape, "text") and shape.text:
+                texts.append(shape.text)
+        txt = "\n".join(t.strip() for t in texts if t.strip()).strip()
+        if txt:
+            items.append({"text": txt, "slide": i+1})
+    return items
+def extract_txt(path: str):
+    with open(path, "r", encoding="utf-8", errors="ignore") as f:
+        txt = f.read().strip()
+    return [{"text": txt, "page": None}] if txt else []
+def extract_url(url: str):
+    r = requests.get(url, timeout=15, headers={"User-Agent": "Mozilla/5.0"})
+    r.raise_for_status()
+    soup = BeautifulSoup(r.text, "html.parser")
+    for tag in soup(["script","style","noscript"]):
+        tag.decompose()
+    text = soup.get_text("\n")
+    text = "\n".join([ln.strip() for ln in text.splitlines() if ln.strip()])
+    return [{"text": text[:200000], "page": None}]
+def upsert_extracted(username: str, notebook_id: str, source_title: str, source_id: str, extracted_items: list[dict]) -> int:
+    col = get_collection(username, notebook_id)
+    ids, docs, metas = [], [], []
+    for item in extracted_items:
+        for j, ch in enumerate(simple_chunk(item["text"])):
+            ids.append(f"{source_id}::chunk{j}")
+            docs.append(ch)
+            metas.append({
+                "source_title": source_title,
+                "source_id": source_id,
+                "page": item.get("page"),
+                "slide": item.get("slide"),
+            })
+    if not docs:
+        return 0
+    embs = EMBED_MODEL.encode(docs, normalize_embeddings=True).tolist()
+    col.upsert(ids=ids, documents=docs, metadatas=metas, embeddings=embs)
+    return len(docs)
+def ingest_files(username: str, notebook_id: str, filepaths: list[str]) -> int:
+    ensure_tree(username, notebook_id)
+    raw_dir = os.path.join(nb_root(username, notebook_id), "files_raw")
+    ex_dir = os.path.join(nb_root(username, notebook_id), "files_extracted")
+    added = 0
+    for fp in filepaths:
+        dest = os.path.join(raw_dir, os.path.basename(fp))
+        pathlib.Path(dest).write_bytes(pathlib.Path(fp).read_bytes())
+        ext = os.path.splitext(dest)[1].lower()
+        if ext == ".pdf":
+            extracted = extract_pdf(dest)
+        elif ext == ".pptx":
+            extracted = extract_pptx(dest)
+        elif ext in [".txt", ".md"]:
+            extracted = extract_txt(dest)
+        else:
+            continue
+        # save extracted
+        ex_path = os.path.join(ex_dir, os.path.basename(dest) + ".txt")
+        with open(ex_path, "w", encoding="utf-8") as f:
+            for item in extracted:
+                loc = f"page={item.get('page')}" if item.get("page") else f"slide={item.get('slide')}" if item.get("slide") else ""
+                f.write(f"\n--- {loc} ---\n{item['text']}\n")
+        added += upsert_extracted(username, notebook_id, os.path.basename(dest), f"file:{os.path.basename(dest)}", extracted)
+    return added
+def ingest_url(username: str, notebook_id: str, url: str) -> int:
+    ensure_tree(username, notebook_id)
+    extracted = extract_url(url)
+    ex_dir = os.path.join(nb_root(username, notebook_id), "files_extracted")
+    fname = safe_name(url.replace("https://","").replace("http://","").replace("/","_")) + ".txt"
+    with open(os.path.join(ex_dir, fname), "w", encoding="utf-8") as f:
+        f.write(extracted[0]["text"])
+    return upsert_extracted(username, notebook_id, url, f"url:{url}", extracted)

src/backend/llm.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import os
+import gradio as gr
+from huggingface_hub import InferenceClient
+HF_INFERENCE_TOKEN = os.environ.get("HF_INFERENCE_TOKEN","").strip()
+HF_LLM_MODEL = os.environ.get("HF_LLM_MODEL","HuggingFaceH4/zephyr-7b-beta").strip()
+_client = InferenceClient(model=HF_LLM_MODEL, token=HF_INFERENCE_TOKEN) if HF_INFERENCE_TOKEN else None
+def llm_generate(prompt: str, max_new_tokens=450, temperature=0.2) -> str:
+    if _client is None:
+        raise gr.Error("HF_INFERENCE_TOKEN not set. Add it in Space secrets.")
+    out = _client.text_generation(
+        prompt,
+        max_new_tokens=max_new_tokens,
+        temperature=temperature,
+        do_sample=temperature > 0,
+        return_full_text=False,
+    )
+    return (out or "").strip()

src/backend/notebooks.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import uuid
+import gradio as gr
+from src.storage.index_store import load_index, save_index, list_notebooks
+from src.storage.paths import ensure_tree
+from src.utils.text import safe_name
+from datetime import datetime
+def now_iso():
+    return datetime.utcnow().isoformat() + "Z"
+def create_notebook(username: str, name: str) -> str:
+    name = safe_name(name)
+    idx = load_index(username)
+    nb_id = str(uuid.uuid4())
+    idx["notebooks"].append({
+        "id": nb_id,
+        "name": name,
+        "created_at": now_iso(),
+        "updated_at": now_iso(),
+    })
+    save_index(username, idx)
+    ensure_tree(username, nb_id)
+    return nb_id
+def rename_notebook(username: str, notebook_id: str, new_name: str):
+    new_name = safe_name(new_name)
+    if not new_name:
+        raise gr.Error("Notebook name cannot be empty.")
+    idx = load_index(username)
+    found = False
+    for nb in idx.get("notebooks", []):
+        if nb["id"] == notebook_id:
+            nb["name"] = new_name
+            nb["updated_at"] = now_iso()
+            found = True
+            break
+    if not found:
+        raise gr.Error("Notebook not found.")
+    save_index(username, idx)
+def delete_notebook(username: str, notebook_id: str):
+    import shutil, os
+    from src.storage.paths import nb_root
+    idx = load_index(username)
+    idx["notebooks"] = [n for n in idx.get("notebooks", []) if n["id"] != notebook_id]
+    save_index(username, idx)
+    base = nb_root(username, notebook_id)
+    if os.path.exists(base):
+        shutil.rmtree(base, ignore_errors=True)

src/backend/rag.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from sentence_transformers import SentenceTransformer
+from src.storage.chroma_store import get_collection
+from src.backend.llm import llm_generate
+EMBED_MODEL = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+def retrieve(username: str, notebook_id: str, query: str, k=6):
+    col = get_collection(username, notebook_id)
+    qemb = EMBED_MODEL.encode([query], normalize_embeddings=True).tolist()
+    res = col.query(query_embeddings=qemb, n_results=k, include=["documents","metadatas","ids"])
+    hits = []
+    for i in range(len(res["ids"][0])):
+        hits.append({"id": res["ids"][0][i], "doc": res["documents"][0][i], "meta": res["metadatas"][0][i]})
+    return hits
+def format_sources(hits):
+    lines = []
+    for i, h in enumerate(hits, start=1):
+        m = h["meta"]
+        loc = ""
+        if m.get("page"): loc = f"p.{m['page']}"
+        if m.get("slide"): loc = f"slide {m['slide']}"
+        lines.append(f"[S{i}] {m.get('source_title','source')} {loc}".strip())
+    return "\n".join(lines)
+def context_block(hits):
+    blocks = []
+    for i, h in enumerate(hits, start=1):
+        m = h["meta"]
+        loc = ""
+        if m.get("page"): loc = f"(page {m['page']})"
+        if m.get("slide"): loc = f"(slide {m['slide']})"
+        blocks.append(f"[S{i}] {m.get('source_title','source')} {loc}\n{h['doc']}")
+    return "\n\n---\n\n".join(blocks)
+def rag_answer(query: str, hits):
+    if not hits:
+        return "Not found in the provided sources. (No indexed chunks yet.)"
+    prompt = f"""
+You are a research assistant. Answer ONLY using the sources below.
+Every non-trivial claim must end with citations like [S1] or [S2].
+If not present in sources, say: Not found in the provided sources.
+Question:
+{query}
+Sources list:
+{format_sources(hits)}
+Source excerpts:
+{context_block(hits)}
+Answer with citations:
+"""
+    ans = llm_generate(prompt, max_new_tokens=450, temperature=0.2)
+    return f"{ans}\n\nSources:\n{format_sources(hits)}"

src/frontend/__pycache__/callbacks.cpython-310.pyc ADDED Viewed

Binary file (4.91 kB). View file

src/frontend/__pycache__/ui.cpython-310.pyc ADDED Viewed

Binary file (3.19 kB). View file

src/frontend/callbacks.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import time
+from datetime import datetime
+import gradio as gr
+from src.backend.notebooks import create_notebook, rename_notebook, delete_notebook
+from src.storage.index_store import list_notebooks
+from src.storage.paths import ensure_tree
+from src.storage.chat_store import append_chat, load_chat
+from src.storage.artifact_store import list_artifacts as list_artifacts_store, next_artifact_path
+from src.backend.ingest import ingest_files as ingest_files_backend, ingest_url as ingest_url_backend
+from src.backend.rag import retrieve, rag_answer
+from src.backend.artifacts import generate_report, generate_quiz, generate_podcast_transcript, transcript_to_mp3
+def now_iso():
+    return datetime.utcnow().isoformat() + "Z"
+def chat_pairs(history):
+    pairs = []
+    last_user = None
+    for m in history:
+        if m.get("role") == "user":
+            last_user = m.get("content","")
+        elif m.get("role") == "assistant":
+            pairs.append((last_user or "", m.get("content","")))
+            last_user = None
+    return pairs
+def ui_bootstrap(username: str):
+    nbs = list_notebooks(username)
+    if not nbs:
+        nb_id = create_notebook(username, "My First Notebook")
+        nbs = list_notebooks(username)
+        current = nb_id
+    else:
+        current = nbs[0][1]
+    ensure_tree(username, current)
+    history = load_chat(username, current)
+    return gr.Dropdown(choices=nbs, value=current), chat_pairs(history), list_artifacts_store(username, current)
+def on_switch_notebook(username: str, notebook_id: str):
+    ensure_tree(username, notebook_id)
+    history = load_chat(username, notebook_id)
+    return chat_pairs(history), list_artifacts_store(username, notebook_id)
+def on_create_notebook(username: str, name: str):
+    nb_id = create_notebook(username, name)
+    nbs = list_notebooks(username)
+    return gr.Dropdown(choices=nbs, value=nb_id), [], list_artifacts_store(username, nb_id)
+def on_rename_notebook(username: str, notebook_id: str, new_name: str):
+    rename_notebook(username, notebook_id, new_name)
+    return gr.Dropdown(choices=list_notebooks(username), value=notebook_id)
+def on_delete_notebook(username: str, notebook_id: str):
+    delete_notebook(username, notebook_id)
+    return ui_bootstrap(username)
+def on_ingest_files(username: str, notebook_id: str, files):
+    if not files:
+        raise gr.Error("Upload at least one file.")
+    added = ingest_files_backend(username, notebook_id, files)
+    return f"Ingested files. Added {added} chunks."
+def on_ingest_url(username: str, notebook_id: str, url: str):
+    url = (url or "").strip()
+    if not url:
+        raise gr.Error("Enter a URL.")
+    added = ingest_url_backend(username, notebook_id, url)
+    return f"Ingested URL. Added {added} chunks."
+def on_chat(username: str, notebook_id: str, chatbot, msg: str):
+    msg = (msg or "").strip()
+    if not msg:
+        return chatbot, ""
+    t0 = time.time()
+    append_chat(username, notebook_id, {"role":"user","content":msg,"ts":now_iso()})
+    hits = retrieve(username, notebook_id, msg, k=6)
+    ans = rag_answer(msg, hits)
+    append_chat(username, notebook_id, {"role":"assistant","content":ans,"ts":now_iso(),"latency_ms":int((time.time()-t0)*1000)})
+    chatbot = chatbot + [(msg, ans)]
+    return chatbot, ""
+def on_report(username: str, notebook_id: str, topic: str, extra: str):
+    topic = (topic or "").strip()
+    if not topic:
+        raise gr.Error("Enter a topic.")
+    hits = retrieve(username, notebook_id, topic, k=6)
+    if not hits:
+        raise gr.Error("No sources yet. Ingest first.")
+    md = generate_report(topic, hits, extra)
+    out = next_artifact_path(username, notebook_id, "reports", ".md")
+    open(out, "w", encoding="utf-8").write(md)
+    return "Report generated.", list_artifacts_store(username, notebook_id), out
+def on_quiz(username: str, notebook_id: str, topic: str, extra: str):
+    topic = (topic or "").strip()
+    if not topic:
+        raise gr.Error("Enter a topic.")
+    hits = retrieve(username, notebook_id, topic, k=6)
+    if not hits:
+        raise gr.Error("No sources yet. Ingest first.")
+    md = generate_quiz(topic, hits, extra)
+    out = next_artifact_path(username, notebook_id, "quizzes", ".md")
+    open(out, "w", encoding="utf-8").write(md)
+    return "Quiz generated.", list_artifacts_store(username, notebook_id), out
+def on_podcast(username: str, notebook_id: str, topic: str, extra: str):
+    topic = (topic or "").strip()
+    if not topic:
+        raise gr.Error("Enter a topic.")
+    hits = retrieve(username, notebook_id, topic, k=6)
+    if not hits:
+        raise gr.Error("No sources yet. Ingest first.")
+    md = generate_podcast_transcript(topic, hits, extra)
+    md_path = next_artifact_path(username, notebook_id, "podcasts", ".md")
+    open(md_path, "w", encoding="utf-8").write(md)
+    mp3_path = next_artifact_path(username, notebook_id, "podcasts", ".mp3")
+    transcript_to_mp3(md, mp3_path)
+    return "Podcast generated.", list_artifacts_store(username, notebook_id), md_path, mp3_path
+def on_download(username: str, notebook_id: str, selection: str):
+    import os
+    from src.storage.paths import nb_root
+    if not selection:
+        return None
+    p = os.path.join(nb_root(username, notebook_id), "artifacts", selection)
+    return p if os.path.exists(p) else None

src/frontend/ui.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import gradio as gr
+from src.frontend.callbacks import (
+    ui_bootstrap, on_switch_notebook, on_create_notebook, on_rename_notebook, on_delete_notebook,
+    on_ingest_files, on_ingest_url, on_chat, on_report, on_quiz, on_podcast, on_download
+)
+from src.backend.auth import require_login
+def build_app():
+    with gr.Blocks(title="NotebookLM Clone") as demo:
+        gr.Markdown("# 📓 NotebookLM Clone (HF Auth + Chroma + RAG)")
+        login = gr.LoginButton()
+        username_state = gr.State("")
+        def on_load(request: gr.Request):
+            username = require_login(request)
+            dd, chat, arts = ui_bootstrap(username)
+            return username, dd, chat, arts
+        with gr.Row():
+            with gr.Column(scale=1):
+                user_box = gr.Textbox(label="User", interactive=False)
+                notebook_dd = gr.Dropdown(label="Notebooks", choices=[], interactive=True)
+                nb_new = gr.Textbox(label="Create notebook", placeholder="Name")
+                btn_create = gr.Button("Create")
+                nb_rename = gr.Textbox(label="Rename notebook", placeholder="New name")
+                btn_rename = gr.Button("Rename")
+                btn_delete = gr.Button("Delete current", variant="stop")
+                gr.Markdown("## Ingest")
+                file_up = gr.File(label="Upload PDF/PPTX/TXT", file_count="multiple")
+                btn_ingest_files = gr.Button("Ingest Files")
+                ingest_status = gr.Textbox(label="Status", interactive=False)
+                url_in = gr.Textbox(label="URL", placeholder="https://...")
+                btn_ingest_url = gr.Button("Ingest URL")
+                url_status = gr.Textbox(label="Status", interactive=False)
+                gr.Markdown("## Artifacts")
+                topic = gr.Textbox(label="Topic / prompt")
+                extra = gr.Textbox(label="Extra prompt (optional)")
+                btn_report = gr.Button("Generate Report")
+                btn_quiz = gr.Button("Generate Quiz")
+                btn_podcast = gr.Button("Generate Podcast")
+                artifact_status = gr.Textbox(label="Artifact status", interactive=False)
+                artifacts_list = gr.Dropdown(label="Artifacts", choices=[], interactive=True)
+                download_btn = gr.Button("Download selected")
+                download_file = gr.File(label="Download", interactive=False)
+                podcast_audio = gr.Audio(label="Podcast Audio", interactive=False)
+            with gr.Column(scale=2):
+                chatbot = gr.Chatbot(height=520, label="Chat (RAG + citations)")
+                msg = gr.Textbox(label="Message")
+                send = gr.Button("Send")
+        demo.load(on_load, inputs=None, outputs=[username_state, notebook_dd, chatbot, artifacts_list], queue=False)
+        username_state.change(lambda u: u, inputs=username_state, outputs=user_box, queue=False)
+        notebook_dd.change(on_switch_notebook, inputs=[username_state, notebook_dd], outputs=[chatbot, artifacts_list], queue=False)
+        btn_create.click(on_create_notebook, inputs=[username_state, nb_new], outputs=[notebook_dd, chatbot, artifacts_list], queue=False)
+        btn_rename.click(on_rename_notebook, inputs=[username_state, notebook_dd, nb_rename], outputs=[notebook_dd], queue=False)
+        btn_delete.click(on_delete_notebook, inputs=[username_state, notebook_dd], outputs=[notebook_dd, chatbot, artifacts_list], queue=False)
+        btn_ingest_files.click(on_ingest_files, inputs=[username_state, notebook_dd, file_up], outputs=[ingest_status], queue=True)
+        btn_ingest_url.click(on_ingest_url, inputs=[username_state, notebook_dd, url_in], outputs=[url_status], queue=True)
+        send.click(on_chat, inputs=[username_state, notebook_dd, chatbot, msg], outputs=[chatbot, msg], queue=True)
+        btn_report.click(on_report, inputs=[username_state, notebook_dd, topic, extra], outputs=[artifact_status, artifacts_list, download_file], queue=True)
+        btn_quiz.click(on_quiz, inputs=[username_state, notebook_dd, topic, extra], outputs=[artifact_status, artifacts_list, download_file], queue=True)
+        btn_podcast.click(on_podcast, inputs=[username_state, notebook_dd, topic, extra], outputs=[artifact_status, artifacts_list, download_file, podcast_audio], queue=True)
+        download_btn.click(on_download, inputs=[username_state, notebook_dd, artifacts_list], outputs=[download_file], queue=False)
+    return demo

src/storage/__pycache__/artifact_store.cpython-310.pyc ADDED Viewed

Binary file (1.18 kB). View file

src/storage/__pycache__/chat_store.cpython-310.pyc ADDED Viewed

Binary file (1.17 kB). View file

src/storage/__pycache__/chroma_store.cpython-310.pyc ADDED Viewed

Binary file (830 Bytes). View file

src/storage/__pycache__/index_store.cpython-310.pyc ADDED Viewed

Binary file (1.73 kB). View file

src/storage/__pycache__/paths.cpython-310.pyc ADDED Viewed

Binary file (1.26 kB). View file

src/storage/artifact_store.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import os
+from src.storage.paths import nb_root
+def list_artifacts(username: str, notebook_id: str):
+    base = os.path.join(nb_root(username, notebook_id), "artifacts")
+    out = []
+    for kind in ["reports","quizzes","podcasts"]:
+        kdir = os.path.join(base, kind)
+        if not os.path.exists(kdir):
+            continue
+        for fn in sorted(os.listdir(kdir)):
+            out.append(f"{kind}/{fn}")
+    return out
+def next_artifact_path(username: str, notebook_id: str, kind: str, ext: str):
+    base = os.path.join(nb_root(username, notebook_id), "artifacts", kind)
+    os.makedirs(base, exist_ok=True)
+    existing = [p for p in os.listdir(base) if p.endswith(ext)]
+    n = len(existing) + 1
+    prefix = {"reports":"report","quizzes":"quiz","podcasts":"podcast"}[kind]
+    return os.path.join(base, f"{prefix}_{n}{ext}")

src/storage/chat_store.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import os, json
+from src.storage.paths import nb_root
+def chat_path(username: str, notebook_id: str) -> str:
+    return os.path.join(nb_root(username, notebook_id), "chat", "messages.jsonl")
+def append_chat(username: str, notebook_id: str, obj: dict):
+    p = chat_path(username, notebook_id)
+    os.makedirs(os.path.dirname(p), exist_ok=True)
+    with open(p, "a", encoding="utf-8") as f:
+        f.write(json.dumps(obj, ensure_ascii=False) + "\n")
+def load_chat(username: str, notebook_id: str):
+    p = chat_path(username, notebook_id)
+    if not os.path.exists(p):
+        return []
+    out = []
+    with open(p, "r", encoding="utf-8") as f:
+        for line in f:
+            try: out.append(json.loads(line))
+            except: pass
+    return out

src/storage/chroma_store.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import os
+import chromadb
+from chromadb.config import Settings
+from src.storage.paths import nb_root
+def chroma_client(username: str, notebook_id: str):
+    persist_dir = os.path.join(nb_root(username, notebook_id), "chroma")
+    os.makedirs(persist_dir, exist_ok=True)
+    return chromadb.PersistentClient(path=persist_dir, settings=Settings(anonymized_telemetry=False))
+def get_collection(username: str, notebook_id: str):
+    client = chroma_client(username, notebook_id)
+    return client.get_or_create_collection(name="docs")

src/storage/index_store.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import os, json
+from datetime import datetime
+from .paths import user_root, index_path, ensure_tree
+def now_iso():
+    return datetime.utcnow().isoformat() + "Z"
+def load_index(username: str) -> dict:
+    os.makedirs(user_root(username), exist_ok=True)
+    p = index_path(username)
+    if not os.path.exists(p):
+        with open(p, "w", encoding="utf-8") as f:
+            json.dump({"notebooks": []}, f, indent=2)
+    with open(p, "r", encoding="utf-8") as f:
+        return json.load(f)
+def save_index(username: str, idx: dict):
+    with open(index_path(username), "w", encoding="utf-8") as f:
+        json.dump(idx, f, indent=2)
+def list_notebooks(username: str):
+    idx = load_index(username)
+    return [(nb["name"], nb["id"]) for nb in idx.get("notebooks", [])]
+def touch_updated(username: str, notebook_id: str):
+    idx = load_index(username)
+    for nb in idx.get("notebooks", []):
+        if nb["id"] == notebook_id:
+            nb["updated_at"] = now_iso()
+            break
+    save_index(username, idx)

src/storage/paths.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import os
+from pathlib import Path
+# If DATA_ROOT env var is not set:
+# - Locally: write to ./data (project folder)
+# - On HF: you will set DATA_ROOT=/data in Space variables (or leave it as /data there)
+DEFAULT_LOCAL_DATA = str(Path(__file__).resolve().parents[2] / "data")
+DATA_ROOT = os.environ.get("DATA_ROOT", DEFAULT_LOCAL_DATA)
+def user_root(username: str) -> str:
+    return os.path.join(DATA_ROOT, "users", username, "notebooks")
+def index_path(username: str) -> str:
+    return os.path.join(user_root(username), "index.json")
+def nb_root(username: str, notebook_id: str) -> str:
+    return os.path.join(user_root(username), notebook_id)
+def ensure_tree(username: str, notebook_id: str):
+    base = nb_root(username, notebook_id)
+    paths = [
+        user_root(username),
+        os.path.join(base, "files_raw"),
+        os.path.join(base, "files_extracted"),
+        os.path.join(base, "chroma"),
+        os.path.join(base, "chat"),
+        os.path.join(base, "artifacts", "reports"),
+        os.path.join(base, "artifacts", "quizzes"),
+        os.path.join(base, "artifacts", "podcasts"),
+    ]
+    for p in paths:
+        os.makedirs(p, exist_ok=True)

src/utils/__pycache__/text.cpython-310.pyc ADDED Viewed

Binary file (423 Bytes). View file

src/utils/text.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import re
+def safe_name(s: str) -> str:
+    s = (s or "").strip()
+    s = re.sub(r"[^a-zA-Z0-9_\- ]+", "", s)
+    s = re.sub(r"\s+", " ", s).strip()
+    return s[:60] if s else "Untitled"

src/utils/timing.py ADDED Viewed

File without changes