Spaces:
Runtime error
Runtime error
Commit ·
8593064
1
Parent(s): ca39256
Testing Flow
Browse files- README.md +32 -5
- app.py +8 -3
- src/backend/__pycache__/artifacts.cpython-310.pyc +0 -0
- src/backend/__pycache__/auth.cpython-310.pyc +0 -0
- src/backend/__pycache__/ingest.cpython-310.pyc +0 -0
- src/backend/__pycache__/llm.cpython-310.pyc +0 -0
- src/backend/__pycache__/notebooks.cpython-310.pyc +0 -0
- src/backend/__pycache__/rag.cpython-310.pyc +0 -0
- src/backend/auth.py +32 -13
- src/backend/ingest.py +113 -54
- src/frontend/__pycache__/callbacks.cpython-310.pyc +0 -0
- src/frontend/__pycache__/ui.cpython-310.pyc +0 -0
- src/frontend/ui.py +55 -197
- src/storage/__pycache__/artifact_store.cpython-310.pyc +0 -0
- src/storage/__pycache__/chat_store.cpython-310.pyc +0 -0
- src/storage/__pycache__/chroma_store.cpython-310.pyc +0 -0
- src/storage/__pycache__/index_store.cpython-310.pyc +0 -0
- src/storage/__pycache__/paths.cpython-310.pyc +0 -0
- src/storage/chroma_store.py +9 -1
- src/utils/__pycache__/text.cpython-310.pyc +0 -0
README.md
CHANGED
|
@@ -1,12 +1,39 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
emoji: 📓
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: "4.44.1"
|
| 8 |
-
python_version: "3.10"
|
| 9 |
app_file: app.py
|
| 10 |
pinned: false
|
| 11 |
hf_oauth: true
|
| 12 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: NotebookLM Clone (GPP1)
|
| 3 |
emoji: 📓
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: pink
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: "4.44.1"
|
|
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
hf_oauth: true
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# NotebookLM Clone (HF OAuth + Chroma + RAG)
|
| 14 |
+
|
| 15 |
+
## Overview
|
| 16 |
+
This project is a simplified clone of Google NotebookLM. Users can create multiple notebooks, upload sources (PDF/PPTX/TXT/URL), chat with their sources using Retrieval-Augmented Generation (RAG) with citations, and generate study artifacts (report, quiz, podcast).
|
| 17 |
+
|
| 18 |
+
## Features
|
| 19 |
+
- HF OAuth login (per-user isolation)
|
| 20 |
+
- Multi-notebook support: create/rename/delete
|
| 21 |
+
- Ingestion: PDF / PPTX / TXT / URL
|
| 22 |
+
- Chunking + Embedding (Sentence-Transformers all-MiniLM-L6-v2)
|
| 23 |
+
- Vector search using ChromaDB (persistent per notebook)
|
| 24 |
+
- Chat with citations
|
| 25 |
+
- Artifact generation:
|
| 26 |
+
- report (.md)
|
| 27 |
+
- quiz with answer key (.md)
|
| 28 |
+
- podcast transcript (.md) + audio (.mp3)
|
| 29 |
+
|
| 30 |
+
## Environment Variables
|
| 31 |
+
### Hugging Face Space
|
| 32 |
+
- DATA_ROOT=/data
|
| 33 |
+
|
| 34 |
+
## Local Dev
|
| 35 |
+
1. Create venv + install dependencies:
|
| 36 |
+
- pip install -r requirements.txt
|
| 37 |
+
2. Run:
|
| 38 |
+
- python app.py
|
| 39 |
+
Note: HF OAuth is best tested in a Space.
|
app.py
CHANGED
|
@@ -1,7 +1,12 @@
|
|
| 1 |
import os
|
| 2 |
|
| 3 |
-
# Disable
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
from src.frontend.ui import build_app
|
| 7 |
|
|
@@ -12,4 +17,4 @@ if __name__ == "__main__":
|
|
| 12 |
server_name="0.0.0.0",
|
| 13 |
server_port=int(os.getenv("PORT", "7860")),
|
| 14 |
show_api=False,
|
| 15 |
-
)
|
|
|
|
| 1 |
import os
|
| 2 |
|
| 3 |
+
# ----- Disable telemetry / analytics noise -----
|
| 4 |
+
# Gradio analytics (UI usage pings)
|
| 5 |
+
os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
|
| 6 |
+
# HF hub telemetry (optional)
|
| 7 |
+
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
|
| 8 |
+
# Chroma telemetry (we also disable via Settings in chroma_store.py)
|
| 9 |
+
os.environ["ANONYMIZED_TELEMETRY"] = "False"
|
| 10 |
|
| 11 |
from src.frontend.ui import build_app
|
| 12 |
|
|
|
|
| 17 |
server_name="0.0.0.0",
|
| 18 |
server_port=int(os.getenv("PORT", "7860")),
|
| 19 |
show_api=False,
|
| 20 |
+
)
|
src/backend/__pycache__/artifacts.cpython-310.pyc
CHANGED
|
Binary files a/src/backend/__pycache__/artifacts.cpython-310.pyc and b/src/backend/__pycache__/artifacts.cpython-310.pyc differ
|
|
|
src/backend/__pycache__/auth.cpython-310.pyc
CHANGED
|
Binary files a/src/backend/__pycache__/auth.cpython-310.pyc and b/src/backend/__pycache__/auth.cpython-310.pyc differ
|
|
|
src/backend/__pycache__/ingest.cpython-310.pyc
CHANGED
|
Binary files a/src/backend/__pycache__/ingest.cpython-310.pyc and b/src/backend/__pycache__/ingest.cpython-310.pyc differ
|
|
|
src/backend/__pycache__/llm.cpython-310.pyc
CHANGED
|
Binary files a/src/backend/__pycache__/llm.cpython-310.pyc and b/src/backend/__pycache__/llm.cpython-310.pyc differ
|
|
|
src/backend/__pycache__/notebooks.cpython-310.pyc
CHANGED
|
Binary files a/src/backend/__pycache__/notebooks.cpython-310.pyc and b/src/backend/__pycache__/notebooks.cpython-310.pyc differ
|
|
|
src/backend/__pycache__/rag.cpython-310.pyc
CHANGED
|
Binary files a/src/backend/__pycache__/rag.cpython-310.pyc and b/src/backend/__pycache__/rag.cpython-310.pyc differ
|
|
|
src/backend/auth.py
CHANGED
|
@@ -2,32 +2,51 @@ import os
|
|
| 2 |
import gradio as gr
|
| 3 |
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
def require_login(request: gr.Request) -> str:
|
| 6 |
"""
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
1) request.username (best case)
|
| 10 |
-
2) HF-proxy headers (x-forwarded-*)
|
| 11 |
-
3) local/dev fallback
|
| 12 |
"""
|
| 13 |
-
# 1) Best-case Gradio
|
| 14 |
username = getattr(request, "username", None)
|
| 15 |
if username:
|
| 16 |
return str(username)
|
| 17 |
|
| 18 |
-
# 2)
|
| 19 |
-
headers =
|
|
|
|
| 20 |
for key in [
|
| 21 |
"x-forwarded-user",
|
| 22 |
"x-hf-user",
|
| 23 |
"x-forwarded-preferred-username",
|
| 24 |
"x-auth-request-preferred-username",
|
| 25 |
]:
|
| 26 |
-
|
| 27 |
-
|
|
|
|
| 28 |
|
| 29 |
-
# 3)
|
| 30 |
-
|
|
|
|
| 31 |
return "localuser"
|
| 32 |
|
| 33 |
-
raise gr.Error("Please log in using 'Sign in with Hugging Face' to use this app.")
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
|
| 4 |
|
| 5 |
+
def _get_headers_dict(request: gr.Request) -> dict:
|
| 6 |
+
h = getattr(request, "headers", None) or {}
|
| 7 |
+
# Normalize keys to lowercase
|
| 8 |
+
out = {}
|
| 9 |
+
try:
|
| 10 |
+
for k, v in dict(h).items():
|
| 11 |
+
out[str(k).lower()] = v
|
| 12 |
+
except Exception:
|
| 13 |
+
return {}
|
| 14 |
+
return out
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def _first_value(v):
|
| 18 |
+
# Some frameworks store header values as lists
|
| 19 |
+
if isinstance(v, (list, tuple)) and v:
|
| 20 |
+
return v[0]
|
| 21 |
+
return v
|
| 22 |
+
|
| 23 |
+
|
| 24 |
def require_login(request: gr.Request) -> str:
|
| 25 |
"""
|
| 26 |
+
Returns a stable username for storage paths.
|
| 27 |
+
Works in HF Spaces and local dev.
|
|
|
|
|
|
|
|
|
|
| 28 |
"""
|
| 29 |
+
# 1) Best-case (some Gradio versions populate this)
|
| 30 |
username = getattr(request, "username", None)
|
| 31 |
if username:
|
| 32 |
return str(username)
|
| 33 |
|
| 34 |
+
# 2) HF proxy headers (varies by setup)
|
| 35 |
+
headers = _get_headers_dict(request)
|
| 36 |
+
|
| 37 |
for key in [
|
| 38 |
"x-forwarded-user",
|
| 39 |
"x-hf-user",
|
| 40 |
"x-forwarded-preferred-username",
|
| 41 |
"x-auth-request-preferred-username",
|
| 42 |
]:
|
| 43 |
+
val = _first_value(headers.get(key))
|
| 44 |
+
if val:
|
| 45 |
+
return str(val)
|
| 46 |
|
| 47 |
+
# 3) Local/dev fallback (so app doesn't hard-crash during dev)
|
| 48 |
+
# HF Spaces usually sets SPACE_ID; locally it won't exist.
|
| 49 |
+
if os.getenv("SPACE_ID") is None and os.getenv("HF_SPACE_ID") is None:
|
| 50 |
return "localuser"
|
| 51 |
|
| 52 |
+
raise gr.Error("Please log in using 'Sign in with Hugging Face' to use this app.")
|
src/backend/ingest.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
| 1 |
-
import os
|
|
|
|
|
|
|
| 2 |
import requests
|
| 3 |
from bs4 import BeautifulSoup
|
| 4 |
from pypdf import PdfReader
|
|
@@ -11,6 +13,17 @@ from src.utils.text import safe_name
|
|
| 11 |
|
| 12 |
EMBED_MODEL = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
def simple_chunk(text: str, max_chars=2200, overlap=250):
|
| 15 |
text = "\n".join([ln.strip() for ln in (text or "").splitlines() if ln.strip()]).strip()
|
| 16 |
if not text:
|
|
@@ -21,19 +34,25 @@ def simple_chunk(text: str, max_chars=2200, overlap=250):
|
|
| 21 |
while start < len(text):
|
| 22 |
end = min(len(text), start + max_chars)
|
| 23 |
out.append(text[start:end])
|
| 24 |
-
if end == len(text):
|
|
|
|
| 25 |
start = max(0, end - overlap)
|
| 26 |
return out
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
def extract_pdf(path: str):
|
| 29 |
reader = PdfReader(path)
|
| 30 |
items = []
|
| 31 |
for i, page in enumerate(reader.pages):
|
| 32 |
txt = (page.extract_text() or "").strip()
|
| 33 |
if txt:
|
| 34 |
-
items.append({"text": txt, "page": i+1})
|
| 35 |
return items
|
| 36 |
|
|
|
|
| 37 |
def extract_pptx(path: str):
|
| 38 |
prs = Presentation(path)
|
| 39 |
items = []
|
|
@@ -44,41 +63,67 @@ def extract_pptx(path: str):
|
|
| 44 |
texts.append(shape.text)
|
| 45 |
txt = "\n".join(t.strip() for t in texts if t.strip()).strip()
|
| 46 |
if txt:
|
| 47 |
-
items.append({"text": txt, "slide": i+1})
|
| 48 |
return items
|
| 49 |
|
|
|
|
| 50 |
def extract_txt(path: str):
|
| 51 |
with open(path, "r", encoding="utf-8", errors="ignore") as f:
|
| 52 |
txt = f.read().strip()
|
| 53 |
-
return [{"text": txt
|
|
|
|
| 54 |
|
| 55 |
def extract_url(url: str):
|
| 56 |
r = requests.get(url, timeout=15, headers={"User-Agent": "Mozilla/5.0"})
|
| 57 |
r.raise_for_status()
|
| 58 |
soup = BeautifulSoup(r.text, "html.parser")
|
| 59 |
-
for tag in soup(["script","style","noscript"]):
|
| 60 |
tag.decompose()
|
| 61 |
text = soup.get_text("\n")
|
| 62 |
text = "\n".join([ln.strip() for ln in text.splitlines() if ln.strip()])
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
col = get_collection(username, notebook_id)
|
|
|
|
| 67 |
ids, docs, metas = [], [], []
|
| 68 |
|
| 69 |
-
for item in extracted_items:
|
| 70 |
-
page = item.get("page"
|
| 71 |
-
slide = item.get("slide"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
-
for
|
| 74 |
-
|
|
|
|
|
|
|
| 75 |
docs.append(ch)
|
| 76 |
|
| 77 |
meta = {
|
| 78 |
"source_title": str(source_title),
|
| 79 |
"source_id": str(source_id),
|
| 80 |
}
|
| 81 |
-
#
|
| 82 |
if page is not None:
|
| 83 |
meta["page"] = int(page)
|
| 84 |
if slide is not None:
|
|
@@ -94,36 +139,32 @@ def upsert_extracted(username: str, notebook_id: str, source_title: str, source_
|
|
| 94 |
return len(docs)
|
| 95 |
|
| 96 |
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
- list[Gradio file objects]
|
| 102 |
-
"""
|
| 103 |
ensure_tree(username, notebook_id)
|
|
|
|
| 104 |
raw_dir = os.path.join(nb_root(username, notebook_id), "files_raw")
|
| 105 |
ex_dir = os.path.join(nb_root(username, notebook_id), "files_extracted")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
added = 0
|
| 107 |
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
normalized_paths.append(f["name"])
|
| 121 |
-
elif isinstance(filepaths, str):
|
| 122 |
-
normalized_paths = [filepaths]
|
| 123 |
-
|
| 124 |
-
for fp in normalized_paths:
|
| 125 |
-
dest = os.path.join(raw_dir, os.path.basename(fp))
|
| 126 |
-
pathlib.Path(dest).write_bytes(pathlib.Path(fp).read_bytes())
|
| 127 |
|
| 128 |
ext = os.path.splitext(dest)[1].lower()
|
| 129 |
if ext == ".pdf":
|
|
@@ -135,24 +176,27 @@ def ingest_files(username: str, notebook_id: str, filepaths) -> int:
|
|
| 135 |
else:
|
| 136 |
continue
|
| 137 |
|
| 138 |
-
# save extracted
|
| 139 |
-
ex_path = os.path.join(ex_dir,
|
| 140 |
-
with open(ex_path, "w", encoding="utf-8") as
|
| 141 |
for item in extracted:
|
|
|
|
| 142 |
if item.get("page") is not None:
|
| 143 |
loc = f"page={item.get('page')}"
|
| 144 |
elif item.get("slide") is not None:
|
| 145 |
loc = f"slide={item.get('slide')}"
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
|
|
|
|
|
|
| 149 |
|
| 150 |
added += upsert_extracted(
|
| 151 |
-
username,
|
| 152 |
-
notebook_id,
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
extracted,
|
| 156 |
)
|
| 157 |
|
| 158 |
return added
|
|
@@ -160,9 +204,24 @@ def ingest_files(username: str, notebook_id: str, filepaths) -> int:
|
|
| 160 |
|
| 161 |
def ingest_url(username: str, notebook_id: str, url: str) -> int:
|
| 162 |
ensure_tree(username, notebook_id)
|
|
|
|
| 163 |
extracted = extract_url(url)
|
| 164 |
ex_dir = os.path.join(nb_root(username, notebook_id), "files_extracted")
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
| 166 |
with open(os.path.join(ex_dir, fname), "w", encoding="utf-8") as f:
|
| 167 |
-
f.write(extracted[0]
|
| 168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pathlib
|
| 3 |
+
import hashlib
|
| 4 |
import requests
|
| 5 |
from bs4 import BeautifulSoup
|
| 6 |
from pypdf import PdfReader
|
|
|
|
| 13 |
|
| 14 |
EMBED_MODEL = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
| 15 |
|
| 16 |
+
|
| 17 |
+
# -------------------------
|
| 18 |
+
# Helpers
|
| 19 |
+
# -------------------------
|
| 20 |
+
def _sha10_bytes(b: bytes) -> str:
|
| 21 |
+
return hashlib.sha256(b).hexdigest()[:10]
|
| 22 |
+
|
| 23 |
+
def _sha10_text(s: str) -> str:
|
| 24 |
+
return hashlib.sha256((s or "").encode("utf-8", errors="ignore")).hexdigest()[:10]
|
| 25 |
+
|
| 26 |
+
|
| 27 |
def simple_chunk(text: str, max_chars=2200, overlap=250):
|
| 28 |
text = "\n".join([ln.strip() for ln in (text or "").splitlines() if ln.strip()]).strip()
|
| 29 |
if not text:
|
|
|
|
| 34 |
while start < len(text):
|
| 35 |
end = min(len(text), start + max_chars)
|
| 36 |
out.append(text[start:end])
|
| 37 |
+
if end == len(text):
|
| 38 |
+
break
|
| 39 |
start = max(0, end - overlap)
|
| 40 |
return out
|
| 41 |
|
| 42 |
+
|
| 43 |
+
# -------------------------
|
| 44 |
+
# Extractors
|
| 45 |
+
# -------------------------
|
| 46 |
def extract_pdf(path: str):
|
| 47 |
reader = PdfReader(path)
|
| 48 |
items = []
|
| 49 |
for i, page in enumerate(reader.pages):
|
| 50 |
txt = (page.extract_text() or "").strip()
|
| 51 |
if txt:
|
| 52 |
+
items.append({"text": txt, "page": i + 1})
|
| 53 |
return items
|
| 54 |
|
| 55 |
+
|
| 56 |
def extract_pptx(path: str):
|
| 57 |
prs = Presentation(path)
|
| 58 |
items = []
|
|
|
|
| 63 |
texts.append(shape.text)
|
| 64 |
txt = "\n".join(t.strip() for t in texts if t.strip()).strip()
|
| 65 |
if txt:
|
| 66 |
+
items.append({"text": txt, "slide": i + 1})
|
| 67 |
return items
|
| 68 |
|
| 69 |
+
|
| 70 |
def extract_txt(path: str):
|
| 71 |
with open(path, "r", encoding="utf-8", errors="ignore") as f:
|
| 72 |
txt = f.read().strip()
|
| 73 |
+
return [{"text": txt}] if txt else []
|
| 74 |
+
|
| 75 |
|
| 76 |
def extract_url(url: str):
|
| 77 |
r = requests.get(url, timeout=15, headers={"User-Agent": "Mozilla/5.0"})
|
| 78 |
r.raise_for_status()
|
| 79 |
soup = BeautifulSoup(r.text, "html.parser")
|
| 80 |
+
for tag in soup(["script", "style", "noscript"]):
|
| 81 |
tag.decompose()
|
| 82 |
text = soup.get_text("\n")
|
| 83 |
text = "\n".join([ln.strip() for ln in text.splitlines() if ln.strip()])
|
| 84 |
+
# hard cap so we don’t embed infinite pages
|
| 85 |
+
return [{"text": text[:200000]}]
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
# -------------------------
|
| 89 |
+
# Chroma upsert
|
| 90 |
+
# -------------------------
|
| 91 |
+
def upsert_extracted(
|
| 92 |
+
username: str,
|
| 93 |
+
notebook_id: str,
|
| 94 |
+
source_title: str,
|
| 95 |
+
source_id: str,
|
| 96 |
+
extracted_items: list[dict],
|
| 97 |
+
) -> int:
|
| 98 |
col = get_collection(username, notebook_id)
|
| 99 |
+
|
| 100 |
ids, docs, metas = [], [], []
|
| 101 |
|
| 102 |
+
for item_idx, item in enumerate(extracted_items):
|
| 103 |
+
page = item.get("page")
|
| 104 |
+
slide = item.get("slide")
|
| 105 |
+
|
| 106 |
+
# stable location string (never None)
|
| 107 |
+
if page is not None:
|
| 108 |
+
loc = f"p{int(page)}"
|
| 109 |
+
elif slide is not None:
|
| 110 |
+
loc = f"s{int(slide)}"
|
| 111 |
+
else:
|
| 112 |
+
loc = f"item{item_idx}"
|
| 113 |
+
|
| 114 |
+
chunks = simple_chunk(item.get("text", ""))
|
| 115 |
|
| 116 |
+
for chunk_idx, ch in enumerate(chunks):
|
| 117 |
+
# ✅ unique per (source + loc + chunk)
|
| 118 |
+
cid = f"{source_id}::{loc}::chunk{chunk_idx}"
|
| 119 |
+
ids.append(cid)
|
| 120 |
docs.append(ch)
|
| 121 |
|
| 122 |
meta = {
|
| 123 |
"source_title": str(source_title),
|
| 124 |
"source_id": str(source_id),
|
| 125 |
}
|
| 126 |
+
# ✅ Chroma metadata cannot contain None → only set if present
|
| 127 |
if page is not None:
|
| 128 |
meta["page"] = int(page)
|
| 129 |
if slide is not None:
|
|
|
|
| 139 |
return len(docs)
|
| 140 |
|
| 141 |
|
| 142 |
+
# -------------------------
|
| 143 |
+
# Public API used by callbacks.py
|
| 144 |
+
# -------------------------
|
| 145 |
+
def ingest_files(username: str, notebook_id: str, files) -> int:
|
|
|
|
|
|
|
| 146 |
ensure_tree(username, notebook_id)
|
| 147 |
+
|
| 148 |
raw_dir = os.path.join(nb_root(username, notebook_id), "files_raw")
|
| 149 |
ex_dir = os.path.join(nb_root(username, notebook_id), "files_extracted")
|
| 150 |
+
|
| 151 |
+
os.makedirs(raw_dir, exist_ok=True)
|
| 152 |
+
os.makedirs(ex_dir, exist_ok=True)
|
| 153 |
+
|
| 154 |
added = 0
|
| 155 |
|
| 156 |
+
for f in (files or []):
|
| 157 |
+
fp = getattr(f, "name", None)
|
| 158 |
+
if not fp:
|
| 159 |
+
continue
|
| 160 |
+
|
| 161 |
+
# copy uploaded file to raw_dir
|
| 162 |
+
src_path = pathlib.Path(fp)
|
| 163 |
+
file_bytes = src_path.read_bytes()
|
| 164 |
+
|
| 165 |
+
base = os.path.basename(fp)
|
| 166 |
+
dest = os.path.join(raw_dir, base)
|
| 167 |
+
pathlib.Path(dest).write_bytes(file_bytes)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
|
| 169 |
ext = os.path.splitext(dest)[1].lower()
|
| 170 |
if ext == ".pdf":
|
|
|
|
| 176 |
else:
|
| 177 |
continue
|
| 178 |
|
| 179 |
+
# save extracted text
|
| 180 |
+
ex_path = os.path.join(ex_dir, base + ".txt")
|
| 181 |
+
with open(ex_path, "w", encoding="utf-8") as ftxt:
|
| 182 |
for item in extracted:
|
| 183 |
+
loc = ""
|
| 184 |
if item.get("page") is not None:
|
| 185 |
loc = f"page={item.get('page')}"
|
| 186 |
elif item.get("slide") is not None:
|
| 187 |
loc = f"slide={item.get('slide')}"
|
| 188 |
+
ftxt.write(f"\n--- {loc} ---\n{item.get('text','')}\n")
|
| 189 |
+
|
| 190 |
+
# ✅ Add a hash so repeated ingest of same filename won't collide
|
| 191 |
+
file_hash = _sha10_bytes(file_bytes)
|
| 192 |
+
source_id = f"file:{base}:{file_hash}"
|
| 193 |
|
| 194 |
added += upsert_extracted(
|
| 195 |
+
username=username,
|
| 196 |
+
notebook_id=notebook_id,
|
| 197 |
+
source_title=base,
|
| 198 |
+
source_id=source_id,
|
| 199 |
+
extracted_items=extracted,
|
| 200 |
)
|
| 201 |
|
| 202 |
return added
|
|
|
|
| 204 |
|
| 205 |
def ingest_url(username: str, notebook_id: str, url: str) -> int:
|
| 206 |
ensure_tree(username, notebook_id)
|
| 207 |
+
|
| 208 |
extracted = extract_url(url)
|
| 209 |
ex_dir = os.path.join(nb_root(username, notebook_id), "files_extracted")
|
| 210 |
+
os.makedirs(ex_dir, exist_ok=True)
|
| 211 |
+
|
| 212 |
+
# save extracted page text
|
| 213 |
+
fname = safe_name(url.replace("https://", "").replace("http://", "").replace("/", "_")) + ".txt"
|
| 214 |
with open(os.path.join(ex_dir, fname), "w", encoding="utf-8") as f:
|
| 215 |
+
f.write(extracted[0].get("text", ""))
|
| 216 |
+
|
| 217 |
+
# ✅ Hash text so re-ingest doesn’t collide
|
| 218 |
+
text_hash = _sha10_text(extracted[0].get("text", ""))
|
| 219 |
+
source_id = f"url:{url}:{text_hash}"
|
| 220 |
+
|
| 221 |
+
return upsert_extracted(
|
| 222 |
+
username=username,
|
| 223 |
+
notebook_id=notebook_id,
|
| 224 |
+
source_title=url,
|
| 225 |
+
source_id=source_id,
|
| 226 |
+
extracted_items=extracted,
|
| 227 |
+
)
|
src/frontend/__pycache__/callbacks.cpython-310.pyc
CHANGED
|
Binary files a/src/frontend/__pycache__/callbacks.cpython-310.pyc and b/src/frontend/__pycache__/callbacks.cpython-310.pyc differ
|
|
|
src/frontend/__pycache__/ui.cpython-310.pyc
CHANGED
|
Binary files a/src/frontend/__pycache__/ui.cpython-310.pyc and b/src/frontend/__pycache__/ui.cpython-310.pyc differ
|
|
|
src/frontend/ui.py
CHANGED
|
@@ -11,15 +11,13 @@ from src.frontend.callbacks import (
|
|
| 11 |
on_report,
|
| 12 |
on_quiz,
|
| 13 |
on_podcast,
|
| 14 |
-
on_download
|
| 15 |
)
|
| 16 |
from src.backend.auth import require_login
|
| 17 |
|
| 18 |
|
| 19 |
def build_app():
|
| 20 |
-
|
| 21 |
with gr.Blocks(title="NotebookLM Clone") as demo:
|
| 22 |
-
|
| 23 |
gr.Markdown("# 📓 NotebookLM Clone (HF Auth + Chroma + RAG)")
|
| 24 |
|
| 25 |
login = gr.LoginButton()
|
|
@@ -27,296 +25,156 @@ def build_app():
|
|
| 27 |
|
| 28 |
username_state = gr.State("")
|
| 29 |
|
| 30 |
-
# ----------
|
| 31 |
-
def on_load(request: gr.Request):
|
| 32 |
-
username = require_login(request)
|
| 33 |
-
dd, chat, arts = ui_bootstrap(username)
|
| 34 |
-
return username, dd, chat, arts
|
| 35 |
-
|
| 36 |
with gr.Row():
|
| 37 |
-
|
| 38 |
-
# ---------- LEFT PANEL ----------
|
| 39 |
with gr.Column(scale=1):
|
|
|
|
| 40 |
|
| 41 |
-
|
| 42 |
-
label="User",
|
| 43 |
-
interactive=False
|
| 44 |
-
)
|
| 45 |
-
|
| 46 |
-
notebook_dd = gr.Dropdown(
|
| 47 |
-
label="Notebooks",
|
| 48 |
-
choices=[],
|
| 49 |
-
interactive=True
|
| 50 |
-
)
|
| 51 |
-
|
| 52 |
-
nb_new = gr.Textbox(
|
| 53 |
-
label="Create notebook",
|
| 54 |
-
placeholder="Name"
|
| 55 |
-
)
|
| 56 |
|
|
|
|
| 57 |
btn_create = gr.Button("Create")
|
| 58 |
|
| 59 |
-
nb_rename = gr.Textbox(
|
| 60 |
-
label="Rename notebook",
|
| 61 |
-
placeholder="New name"
|
| 62 |
-
)
|
| 63 |
-
|
| 64 |
btn_rename = gr.Button("Rename")
|
| 65 |
|
| 66 |
-
btn_delete = gr.Button(
|
| 67 |
-
"Delete current",
|
| 68 |
-
variant="stop"
|
| 69 |
-
)
|
| 70 |
|
| 71 |
-
# ---------- INGEST ----------
|
| 72 |
gr.Markdown("## Ingest")
|
| 73 |
-
|
| 74 |
-
file_up = gr.File(
|
| 75 |
-
label="Upload PDF/PPTX/TXT",
|
| 76 |
-
file_count="multiple"
|
| 77 |
-
)
|
| 78 |
-
|
| 79 |
btn_ingest_files = gr.Button("Ingest Files")
|
|
|
|
| 80 |
|
| 81 |
-
|
| 82 |
-
label="Status",
|
| 83 |
-
interactive=False
|
| 84 |
-
)
|
| 85 |
-
|
| 86 |
-
url_in = gr.Textbox(
|
| 87 |
-
label="URL",
|
| 88 |
-
placeholder="https://..."
|
| 89 |
-
)
|
| 90 |
-
|
| 91 |
btn_ingest_url = gr.Button("Ingest URL")
|
|
|
|
| 92 |
|
| 93 |
-
url_status = gr.Textbox(
|
| 94 |
-
label="Status",
|
| 95 |
-
interactive=False
|
| 96 |
-
)
|
| 97 |
-
|
| 98 |
-
# ---------- ARTIFACTS ----------
|
| 99 |
gr.Markdown("## Artifacts")
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
label="Topic / prompt"
|
| 103 |
-
)
|
| 104 |
-
|
| 105 |
-
extra = gr.Textbox(
|
| 106 |
-
label="Extra prompt (optional)"
|
| 107 |
-
)
|
| 108 |
-
|
| 109 |
btn_report = gr.Button("Generate Report")
|
| 110 |
btn_quiz = gr.Button("Generate Quiz")
|
| 111 |
btn_podcast = gr.Button("Generate Podcast")
|
| 112 |
|
| 113 |
-
artifact_status = gr.Textbox(
|
| 114 |
-
|
| 115 |
-
interactive=False
|
| 116 |
-
)
|
| 117 |
-
|
| 118 |
-
artifacts_list = gr.Dropdown(
|
| 119 |
-
label="Artifacts",
|
| 120 |
-
choices=[],
|
| 121 |
-
interactive=True
|
| 122 |
-
)
|
| 123 |
-
|
| 124 |
download_btn = gr.Button("Download selected")
|
|
|
|
|
|
|
| 125 |
|
| 126 |
-
download_file = gr.File(
|
| 127 |
-
label="Download",
|
| 128 |
-
interactive=False
|
| 129 |
-
)
|
| 130 |
-
|
| 131 |
-
podcast_audio = gr.Audio(
|
| 132 |
-
label="Podcast Audio",
|
| 133 |
-
interactive=False
|
| 134 |
-
)
|
| 135 |
-
|
| 136 |
-
# ---------- RIGHT PANEL ----------
|
| 137 |
with gr.Column(scale=2):
|
| 138 |
-
|
| 139 |
-
chatbot = gr.Chatbot(
|
| 140 |
-
height=520,
|
| 141 |
-
label="Chat (RAG + citations)"
|
| 142 |
-
)
|
| 143 |
-
|
| 144 |
msg = gr.Textbox(label="Message")
|
| 145 |
-
|
| 146 |
send = gr.Button("Send")
|
| 147 |
|
| 148 |
-
# ----------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
demo.load(
|
| 151 |
on_load,
|
| 152 |
inputs=None,
|
| 153 |
outputs=[
|
| 154 |
username_state,
|
|
|
|
| 155 |
notebook_dd,
|
| 156 |
chatbot,
|
| 157 |
-
artifacts_list
|
| 158 |
],
|
| 159 |
queue=False,
|
| 160 |
-
api_name=False
|
| 161 |
-
)
|
| 162 |
-
|
| 163 |
-
username_state.change(
|
| 164 |
-
lambda u: u,
|
| 165 |
-
inputs=username_state,
|
| 166 |
-
outputs=user_box,
|
| 167 |
-
queue=False,
|
| 168 |
-
api_name=False
|
| 169 |
)
|
| 170 |
|
|
|
|
| 171 |
notebook_dd.change(
|
| 172 |
on_switch_notebook,
|
| 173 |
inputs=[username_state, notebook_dd],
|
| 174 |
outputs=[chatbot, artifacts_list],
|
| 175 |
queue=False,
|
| 176 |
-
api_name=False
|
| 177 |
)
|
| 178 |
|
| 179 |
btn_create.click(
|
| 180 |
on_create_notebook,
|
| 181 |
inputs=[username_state, nb_new],
|
| 182 |
-
outputs=[
|
| 183 |
-
notebook_dd,
|
| 184 |
-
chatbot,
|
| 185 |
-
artifacts_list
|
| 186 |
-
],
|
| 187 |
queue=False,
|
| 188 |
-
api_name=False
|
| 189 |
)
|
| 190 |
|
| 191 |
btn_rename.click(
|
| 192 |
on_rename_notebook,
|
| 193 |
-
inputs=[
|
| 194 |
-
username_state,
|
| 195 |
-
notebook_dd,
|
| 196 |
-
nb_rename
|
| 197 |
-
],
|
| 198 |
outputs=[notebook_dd],
|
| 199 |
queue=False,
|
| 200 |
-
api_name=False
|
| 201 |
)
|
| 202 |
|
| 203 |
btn_delete.click(
|
| 204 |
on_delete_notebook,
|
| 205 |
-
inputs=[
|
| 206 |
-
|
| 207 |
-
notebook_dd
|
| 208 |
-
],
|
| 209 |
-
outputs=[
|
| 210 |
-
notebook_dd,
|
| 211 |
-
chatbot,
|
| 212 |
-
artifacts_list
|
| 213 |
-
],
|
| 214 |
queue=False,
|
| 215 |
-
api_name=False
|
| 216 |
)
|
| 217 |
|
| 218 |
btn_ingest_files.click(
|
| 219 |
on_ingest_files,
|
| 220 |
-
inputs=[
|
| 221 |
-
username_state,
|
| 222 |
-
notebook_dd,
|
| 223 |
-
file_up
|
| 224 |
-
],
|
| 225 |
outputs=[ingest_status],
|
| 226 |
queue=True,
|
| 227 |
-
api_name=False
|
| 228 |
)
|
| 229 |
|
| 230 |
btn_ingest_url.click(
|
| 231 |
on_ingest_url,
|
| 232 |
-
inputs=[
|
| 233 |
-
username_state,
|
| 234 |
-
notebook_dd,
|
| 235 |
-
url_in
|
| 236 |
-
],
|
| 237 |
outputs=[url_status],
|
| 238 |
queue=True,
|
| 239 |
-
api_name=False
|
| 240 |
)
|
| 241 |
|
| 242 |
send.click(
|
| 243 |
on_chat,
|
| 244 |
-
inputs=[
|
| 245 |
-
|
| 246 |
-
notebook_dd,
|
| 247 |
-
chatbot,
|
| 248 |
-
msg
|
| 249 |
-
],
|
| 250 |
-
outputs=[
|
| 251 |
-
chatbot,
|
| 252 |
-
msg
|
| 253 |
-
],
|
| 254 |
queue=True,
|
| 255 |
-
api_name=False
|
| 256 |
)
|
| 257 |
|
| 258 |
btn_report.click(
|
| 259 |
on_report,
|
| 260 |
-
inputs=[
|
| 261 |
-
|
| 262 |
-
notebook_dd,
|
| 263 |
-
topic,
|
| 264 |
-
extra
|
| 265 |
-
],
|
| 266 |
-
outputs=[
|
| 267 |
-
artifact_status,
|
| 268 |
-
artifacts_list,
|
| 269 |
-
download_file
|
| 270 |
-
],
|
| 271 |
queue=True,
|
| 272 |
-
api_name=False
|
| 273 |
)
|
| 274 |
|
| 275 |
btn_quiz.click(
|
| 276 |
on_quiz,
|
| 277 |
-
inputs=[
|
| 278 |
-
|
| 279 |
-
notebook_dd,
|
| 280 |
-
topic,
|
| 281 |
-
extra
|
| 282 |
-
],
|
| 283 |
-
outputs=[
|
| 284 |
-
artifact_status,
|
| 285 |
-
artifacts_list,
|
| 286 |
-
download_file
|
| 287 |
-
],
|
| 288 |
queue=True,
|
| 289 |
-
api_name=False
|
| 290 |
)
|
| 291 |
|
| 292 |
btn_podcast.click(
|
| 293 |
on_podcast,
|
| 294 |
-
inputs=[
|
| 295 |
-
|
| 296 |
-
notebook_dd,
|
| 297 |
-
topic,
|
| 298 |
-
extra
|
| 299 |
-
],
|
| 300 |
-
outputs=[
|
| 301 |
-
artifact_status,
|
| 302 |
-
artifacts_list,
|
| 303 |
-
download_file,
|
| 304 |
-
podcast_audio
|
| 305 |
-
],
|
| 306 |
queue=True,
|
| 307 |
-
api_name=False
|
| 308 |
)
|
| 309 |
|
| 310 |
download_btn.click(
|
| 311 |
on_download,
|
| 312 |
-
inputs=[
|
| 313 |
-
username_state,
|
| 314 |
-
notebook_dd,
|
| 315 |
-
artifacts_list
|
| 316 |
-
],
|
| 317 |
outputs=[download_file],
|
| 318 |
queue=False,
|
| 319 |
-
api_name=False
|
| 320 |
)
|
| 321 |
|
| 322 |
-
return demo
|
|
|
|
| 11 |
on_report,
|
| 12 |
on_quiz,
|
| 13 |
on_podcast,
|
| 14 |
+
on_download,
|
| 15 |
)
|
| 16 |
from src.backend.auth import require_login
|
| 17 |
|
| 18 |
|
| 19 |
def build_app():
|
|
|
|
| 20 |
with gr.Blocks(title="NotebookLM Clone") as demo:
|
|
|
|
| 21 |
gr.Markdown("# 📓 NotebookLM Clone (HF Auth + Chroma + RAG)")
|
| 22 |
|
| 23 |
login = gr.LoginButton()
|
|
|
|
| 25 |
|
| 26 |
username_state = gr.State("")
|
| 27 |
|
| 28 |
+
# ---------- UI ----------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
with gr.Row():
|
|
|
|
|
|
|
| 30 |
with gr.Column(scale=1):
|
| 31 |
+
user_box = gr.Textbox(label="User", interactive=False)
|
| 32 |
|
| 33 |
+
notebook_dd = gr.Dropdown(label="Notebooks", choices=[], interactive=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
+
nb_new = gr.Textbox(label="Create notebook", placeholder="Name")
|
| 36 |
btn_create = gr.Button("Create")
|
| 37 |
|
| 38 |
+
nb_rename = gr.Textbox(label="Rename notebook", placeholder="New name")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
btn_rename = gr.Button("Rename")
|
| 40 |
|
| 41 |
+
btn_delete = gr.Button("Delete current", variant="stop")
|
|
|
|
|
|
|
|
|
|
| 42 |
|
|
|
|
| 43 |
gr.Markdown("## Ingest")
|
| 44 |
+
file_up = gr.File(label="Upload PDF/PPTX/TXT", file_count="multiple")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
btn_ingest_files = gr.Button("Ingest Files")
|
| 46 |
+
ingest_status = gr.Textbox(label="Status", interactive=False)
|
| 47 |
|
| 48 |
+
url_in = gr.Textbox(label="URL", placeholder="https://...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
btn_ingest_url = gr.Button("Ingest URL")
|
| 50 |
+
url_status = gr.Textbox(label="Status", interactive=False)
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
gr.Markdown("## Artifacts")
|
| 53 |
+
topic = gr.Textbox(label="Topic / prompt")
|
| 54 |
+
extra = gr.Textbox(label="Extra prompt (optional)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
btn_report = gr.Button("Generate Report")
|
| 56 |
btn_quiz = gr.Button("Generate Quiz")
|
| 57 |
btn_podcast = gr.Button("Generate Podcast")
|
| 58 |
|
| 59 |
+
artifact_status = gr.Textbox(label="Artifact status", interactive=False)
|
| 60 |
+
artifacts_list = gr.Dropdown(label="Artifacts", choices=[], interactive=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
download_btn = gr.Button("Download selected")
|
| 62 |
+
download_file = gr.File(label="Download", interactive=False)
|
| 63 |
+
podcast_audio = gr.Audio(label="Podcast Audio", interactive=False)
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
with gr.Column(scale=2):
|
| 66 |
+
chatbot = gr.Chatbot(height=520, label="Chat (RAG + citations)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
msg = gr.Textbox(label="Message")
|
|
|
|
| 68 |
send = gr.Button("Send")
|
| 69 |
|
| 70 |
+
# ---------- LOAD ----------
|
| 71 |
+
def on_load(request: gr.Request):
|
| 72 |
+
username = require_login(request)
|
| 73 |
+
dd, chat, arts = ui_bootstrap(username)
|
| 74 |
+
# ✅ Return user_box value directly so it always shows
|
| 75 |
+
return username, username, dd, chat, arts
|
| 76 |
|
| 77 |
demo.load(
|
| 78 |
on_load,
|
| 79 |
inputs=None,
|
| 80 |
outputs=[
|
| 81 |
username_state,
|
| 82 |
+
user_box, # ✅ always filled
|
| 83 |
notebook_dd,
|
| 84 |
chatbot,
|
| 85 |
+
artifacts_list,
|
| 86 |
],
|
| 87 |
queue=False,
|
| 88 |
+
api_name=False,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
)
|
| 90 |
|
| 91 |
+
# ---------- EVENTS ----------
|
| 92 |
notebook_dd.change(
|
| 93 |
on_switch_notebook,
|
| 94 |
inputs=[username_state, notebook_dd],
|
| 95 |
outputs=[chatbot, artifacts_list],
|
| 96 |
queue=False,
|
| 97 |
+
api_name=False,
|
| 98 |
)
|
| 99 |
|
| 100 |
btn_create.click(
|
| 101 |
on_create_notebook,
|
| 102 |
inputs=[username_state, nb_new],
|
| 103 |
+
outputs=[notebook_dd, chatbot, artifacts_list],
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
queue=False,
|
| 105 |
+
api_name=False,
|
| 106 |
)
|
| 107 |
|
| 108 |
btn_rename.click(
|
| 109 |
on_rename_notebook,
|
| 110 |
+
inputs=[username_state, notebook_dd, nb_rename],
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
outputs=[notebook_dd],
|
| 112 |
queue=False,
|
| 113 |
+
api_name=False,
|
| 114 |
)
|
| 115 |
|
| 116 |
btn_delete.click(
|
| 117 |
on_delete_notebook,
|
| 118 |
+
inputs=[username_state, notebook_dd],
|
| 119 |
+
outputs=[notebook_dd, chatbot, artifacts_list],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
queue=False,
|
| 121 |
+
api_name=False,
|
| 122 |
)
|
| 123 |
|
| 124 |
btn_ingest_files.click(
|
| 125 |
on_ingest_files,
|
| 126 |
+
inputs=[username_state, notebook_dd, file_up],
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
outputs=[ingest_status],
|
| 128 |
queue=True,
|
| 129 |
+
api_name=False,
|
| 130 |
)
|
| 131 |
|
| 132 |
btn_ingest_url.click(
|
| 133 |
on_ingest_url,
|
| 134 |
+
inputs=[username_state, notebook_dd, url_in],
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
outputs=[url_status],
|
| 136 |
queue=True,
|
| 137 |
+
api_name=False,
|
| 138 |
)
|
| 139 |
|
| 140 |
send.click(
|
| 141 |
on_chat,
|
| 142 |
+
inputs=[username_state, notebook_dd, chatbot, msg],
|
| 143 |
+
outputs=[chatbot, msg],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
queue=True,
|
| 145 |
+
api_name=False,
|
| 146 |
)
|
| 147 |
|
| 148 |
btn_report.click(
|
| 149 |
on_report,
|
| 150 |
+
inputs=[username_state, notebook_dd, topic, extra],
|
| 151 |
+
outputs=[artifact_status, artifacts_list, download_file],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
queue=True,
|
| 153 |
+
api_name=False,
|
| 154 |
)
|
| 155 |
|
| 156 |
btn_quiz.click(
|
| 157 |
on_quiz,
|
| 158 |
+
inputs=[username_state, notebook_dd, topic, extra],
|
| 159 |
+
outputs=[artifact_status, artifacts_list, download_file],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
queue=True,
|
| 161 |
+
api_name=False,
|
| 162 |
)
|
| 163 |
|
| 164 |
btn_podcast.click(
|
| 165 |
on_podcast,
|
| 166 |
+
inputs=[username_state, notebook_dd, topic, extra],
|
| 167 |
+
outputs=[artifact_status, artifacts_list, download_file, podcast_audio],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
queue=True,
|
| 169 |
+
api_name=False,
|
| 170 |
)
|
| 171 |
|
| 172 |
download_btn.click(
|
| 173 |
on_download,
|
| 174 |
+
inputs=[username_state, notebook_dd, artifacts_list],
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
outputs=[download_file],
|
| 176 |
queue=False,
|
| 177 |
+
api_name=False,
|
| 178 |
)
|
| 179 |
|
| 180 |
+
return demo
|
src/storage/__pycache__/artifact_store.cpython-310.pyc
CHANGED
|
Binary files a/src/storage/__pycache__/artifact_store.cpython-310.pyc and b/src/storage/__pycache__/artifact_store.cpython-310.pyc differ
|
|
|
src/storage/__pycache__/chat_store.cpython-310.pyc
CHANGED
|
Binary files a/src/storage/__pycache__/chat_store.cpython-310.pyc and b/src/storage/__pycache__/chat_store.cpython-310.pyc differ
|
|
|
src/storage/__pycache__/chroma_store.cpython-310.pyc
CHANGED
|
Binary files a/src/storage/__pycache__/chroma_store.cpython-310.pyc and b/src/storage/__pycache__/chroma_store.cpython-310.pyc differ
|
|
|
src/storage/__pycache__/index_store.cpython-310.pyc
CHANGED
|
Binary files a/src/storage/__pycache__/index_store.cpython-310.pyc and b/src/storage/__pycache__/index_store.cpython-310.pyc differ
|
|
|
src/storage/__pycache__/paths.cpython-310.pyc
CHANGED
|
Binary files a/src/storage/__pycache__/paths.cpython-310.pyc and b/src/storage/__pycache__/paths.cpython-310.pyc differ
|
|
|
src/storage/chroma_store.py
CHANGED
|
@@ -1,17 +1,25 @@
|
|
| 1 |
import os
|
| 2 |
import chromadb
|
|
|
|
| 3 |
|
| 4 |
from src.storage.paths import nb_root
|
| 5 |
|
| 6 |
# Cache clients by persist_dir to avoid "ephemeral with different settings"
|
| 7 |
_CLIENTS: dict[str, chromadb.PersistentClient] = {}
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
def chroma_client(username: str, notebook_id: str) -> chromadb.PersistentClient:
|
| 10 |
persist_dir = os.path.join(nb_root(username, notebook_id), "chroma")
|
| 11 |
os.makedirs(persist_dir, exist_ok=True)
|
| 12 |
|
| 13 |
if persist_dir not in _CLIENTS:
|
| 14 |
-
_CLIENTS[persist_dir] = chromadb.PersistentClient(path=persist_dir)
|
|
|
|
| 15 |
return _CLIENTS[persist_dir]
|
| 16 |
|
| 17 |
def get_collection(username: str, notebook_id: str, name: str = "chunks"):
|
|
|
|
| 1 |
import os
|
| 2 |
import chromadb
|
| 3 |
+
from chromadb.config import Settings
|
| 4 |
|
| 5 |
from src.storage.paths import nb_root
|
| 6 |
|
| 7 |
# Cache clients by persist_dir to avoid "ephemeral with different settings"
|
| 8 |
_CLIENTS: dict[str, chromadb.PersistentClient] = {}
|
| 9 |
|
| 10 |
+
# One shared Settings object (important: consistent settings!)
|
| 11 |
+
_SETTINGS = Settings(
|
| 12 |
+
anonymized_telemetry=False, # ✅ disables telemetry (stops capture() errors)
|
| 13 |
+
allow_reset=True,
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
def chroma_client(username: str, notebook_id: str) -> chromadb.PersistentClient:
|
| 17 |
persist_dir = os.path.join(nb_root(username, notebook_id), "chroma")
|
| 18 |
os.makedirs(persist_dir, exist_ok=True)
|
| 19 |
|
| 20 |
if persist_dir not in _CLIENTS:
|
| 21 |
+
_CLIENTS[persist_dir] = chromadb.PersistentClient(path=persist_dir, settings=_SETTINGS)
|
| 22 |
+
|
| 23 |
return _CLIENTS[persist_dir]
|
| 24 |
|
| 25 |
def get_collection(username: str, notebook_id: str, name: str = "chunks"):
|
src/utils/__pycache__/text.cpython-310.pyc
CHANGED
|
Binary files a/src/utils/__pycache__/text.cpython-310.pyc and b/src/utils/__pycache__/text.cpython-310.pyc differ
|
|
|