dlokesha commited on
Commit ·
e9940ca
1
Parent(s): 00f3502
Updated quiz feature
Browse files- app.py +132 -27
- backend/quiz_service.py +30 -13
app.py
CHANGED
|
@@ -381,19 +381,19 @@ def _load_sources(notebook_id, profile: gr.OAuthProfile | None):
|
|
| 381 |
return _format_sources(sources)
|
| 382 |
|
| 383 |
# Quiz Handlers
|
| 384 |
-
def _get_notebook_pdfs(notebook_id):
|
| 385 |
-
|
|
|
|
| 386 |
return gr.update(choices=[], value=None, visible=False)
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
.
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
)
|
| 394 |
-
pdfs = list({r["source_id"] for r in (result.data or []) if r["source_id"].endswith(".pdf")})
|
| 395 |
return gr.update(choices=pdfs, value=pdfs[0] if pdfs else None, visible=True)
|
| 396 |
|
|
|
|
| 397 |
def _generate_quiz(notebook_id, source_type, pdf_source_id, profile: gr.OAuthProfile | None):
|
| 398 |
from backend.quiz_service import generate_quiz
|
| 399 |
|
|
@@ -406,6 +406,9 @@ def _generate_quiz(notebook_id, source_type, pdf_source_id, profile: gr.OAuthPro
|
|
| 406 |
type_map = {"Text": "txt", "PDF": "pdf", "URL": "url", "All": "all"}
|
| 407 |
source_type_key = type_map.get(source_type, "all")
|
| 408 |
|
|
|
|
|
|
|
|
|
|
| 409 |
try:
|
| 410 |
result = generate_quiz(notebook_id, source_type=source_type_key, source_id=pdf_source_id)
|
| 411 |
questions = result["questions"]
|
|
@@ -419,6 +422,7 @@ def _generate_quiz(notebook_id, source_type, pdf_source_id, profile: gr.OAuthPro
|
|
| 419 |
elif q["type"] == "true_false":
|
| 420 |
updates += [gr.update(visible=True), gr.update(value=q_label), gr.update(choices=["True", "False"], value=None, visible=True), gr.update(value="", visible=False)]
|
| 421 |
else:
|
|
|
|
| 422 |
updates += [gr.update(visible=True), gr.update(value=q_label), gr.update(choices=[], value=None, visible=False), gr.update(value="", visible=True)]
|
| 423 |
else:
|
| 424 |
updates += [gr.update(visible=False), gr.update(value=""), gr.update(choices=[], value=None, visible=False), gr.update(value="", visible=False)]
|
|
@@ -458,6 +462,60 @@ def _submit_quiz(questions, *answers):
|
|
| 458 |
lines.append(f"\n**Score: {score}/{len(questions)}**")
|
| 459 |
return "\n\n".join(lines)
|
| 460 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 461 |
with gr.Blocks(
|
| 462 |
title="NotebookLM Clone - Notebooks",
|
| 463 |
theme=theme,
|
|
@@ -534,9 +592,6 @@ with gr.Blocks(
|
|
| 534 |
|
| 535 |
status = gr.Markdown("Sign in with Hugging Face to manage notebooks.", elem_classes=["status"])
|
| 536 |
|
| 537 |
-
demo.load(_initial_load, inputs=None, outputs=[nb_state, selected_notebook_id, status] + row_outputs, api_name=False)
|
| 538 |
-
demo.load(_list_uploaded_pdfs, inputs=[selected_notebook_id], outputs=[uploaded_pdf_dd], api_name=False)
|
| 539 |
-
|
| 540 |
# Create button
|
| 541 |
create_btn.click(
|
| 542 |
_safe_create,
|
|
@@ -592,15 +647,6 @@ with gr.Blocks(
|
|
| 592 |
outputs=[nb_state, selected_notebook_id, status] + row_outputs,
|
| 593 |
api_name=False,
|
| 594 |
).then(_list_uploaded_pdfs, inputs=[selected_notebook_id], outputs=[uploaded_pdf_dd])
|
| 595 |
-
def _on_select():
|
| 596 |
-
return "Selected notebook updated. Use this for chat/ingestion."
|
| 597 |
-
select_btn.click(
|
| 598 |
-
_select_notebook,
|
| 599 |
-
inputs=[gr.State(i), nb_state],
|
| 600 |
-
outputs=[selected_notebook_id],
|
| 601 |
-
api_name=False,
|
| 602 |
-
).then(_on_select, None, [status]).then(_list_uploaded_pdfs, inputs=[selected_notebook_id], outputs=[uploaded_pdf_dd])
|
| 603 |
-
|
| 604 |
|
| 605 |
# Text Input Section
|
| 606 |
gr.Markdown("---")
|
|
@@ -627,23 +673,35 @@ with gr.Blocks(
|
|
| 627 |
# Quiz Section
|
| 628 |
gr.Markdown("---")
|
| 629 |
gr.Markdown("## Generate Quiz")
|
| 630 |
-
gr.Markdown("Select a source type then generate a quiz.")
|
| 631 |
|
| 632 |
quiz_source_type = gr.Radio(
|
| 633 |
choices=["Text", "PDF", "URL", "All"],
|
| 634 |
value="All",
|
| 635 |
label="Source type",
|
| 636 |
)
|
|
|
|
|
|
|
| 637 |
quiz_pdf_dd = gr.Dropdown(
|
| 638 |
-
label="Select PDF",
|
| 639 |
choices=[],
|
| 640 |
value=None,
|
| 641 |
visible=False,
|
| 642 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 643 |
generate_quiz_btn = gr.Button("Generate Quiz", variant="primary")
|
| 644 |
-
quiz_status = gr.Markdown("")
|
| 645 |
-
quiz_state = gr.State([])
|
| 646 |
|
|
|
|
|
|
|
|
|
|
| 647 |
quiz_components = []
|
| 648 |
for i in range(5):
|
| 649 |
with gr.Group(visible=False) as q_group:
|
|
@@ -655,6 +713,33 @@ with gr.Blocks(
|
|
| 655 |
submit_quiz_btn = gr.Button("Submit Answers", variant="secondary", visible=False)
|
| 656 |
quiz_results = gr.Markdown("")
|
| 657 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 658 |
submit_btn.click(
|
| 659 |
_do_upload,
|
| 660 |
inputs=[txt_input, txt_title, selected_notebook_id],
|
|
@@ -662,17 +747,37 @@ with gr.Blocks(
|
|
| 662 |
)
|
| 663 |
|
| 664 |
quiz_source_type.change(
|
| 665 |
-
|
| 666 |
inputs=[quiz_source_type, selected_notebook_id],
|
| 667 |
outputs=[quiz_pdf_dd],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 668 |
)
|
| 669 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 670 |
quiz_all_outputs = [quiz_status, quiz_state]
|
| 671 |
for c in quiz_components:
|
| 672 |
quiz_all_outputs += [c["group"], c["text"], c["radio"], c["textbox"]]
|
| 673 |
quiz_all_outputs += [submit_quiz_btn, quiz_results]
|
| 674 |
|
| 675 |
generate_quiz_btn.click(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 676 |
_generate_quiz,
|
| 677 |
inputs=[selected_notebook_id, quiz_source_type, quiz_pdf_dd],
|
| 678 |
outputs=quiz_all_outputs,
|
|
|
|
| 381 |
return _format_sources(sources)
|
| 382 |
|
| 383 |
# Quiz Handlers
|
| 384 |
+
def _get_notebook_pdfs(notebook_id, profile: gr.OAuthProfile | None):
|
| 385 |
+
user_id = _user_id(profile)
|
| 386 |
+
if not user_id or not notebook_id:
|
| 387 |
return gr.update(choices=[], value=None, visible=False)
|
| 388 |
+
|
| 389 |
+
target_dir = Path("data") / "uploads" / user_id / str(notebook_id)
|
| 390 |
+
if not target_dir.exists():
|
| 391 |
+
return gr.update(choices=[], value=None, visible=False)
|
| 392 |
+
|
| 393 |
+
pdfs = sorted([p.name for p in target_dir.glob("*.pdf")])
|
|
|
|
|
|
|
| 394 |
return gr.update(choices=pdfs, value=pdfs[0] if pdfs else None, visible=True)
|
| 395 |
|
| 396 |
+
|
| 397 |
def _generate_quiz(notebook_id, source_type, pdf_source_id, profile: gr.OAuthProfile | None):
|
| 398 |
from backend.quiz_service import generate_quiz
|
| 399 |
|
|
|
|
| 406 |
type_map = {"Text": "txt", "PDF": "pdf", "URL": "url", "All": "all"}
|
| 407 |
source_type_key = type_map.get(source_type, "all")
|
| 408 |
|
| 409 |
+
if source_type_key == "pdf" and not pdf_source_id:
|
| 410 |
+
return "Pick a PDF first.", [], *([gr.update(visible=False)] * 5 * 4), gr.update(visible=False), ""
|
| 411 |
+
|
| 412 |
try:
|
| 413 |
result = generate_quiz(notebook_id, source_type=source_type_key, source_id=pdf_source_id)
|
| 414 |
questions = result["questions"]
|
|
|
|
| 422 |
elif q["type"] == "true_false":
|
| 423 |
updates += [gr.update(visible=True), gr.update(value=q_label), gr.update(choices=["True", "False"], value=None, visible=True), gr.update(value="", visible=False)]
|
| 424 |
else:
|
| 425 |
+
# change this line for short_answer:
|
| 426 |
updates += [gr.update(visible=True), gr.update(value=q_label), gr.update(choices=[], value=None, visible=False), gr.update(value="", visible=True)]
|
| 427 |
else:
|
| 428 |
updates += [gr.update(visible=False), gr.update(value=""), gr.update(choices=[], value=None, visible=False), gr.update(value="", visible=False)]
|
|
|
|
| 462 |
lines.append(f"\n**Score: {score}/{len(questions)}**")
|
| 463 |
return "\n\n".join(lines)
|
| 464 |
|
| 465 |
+
def _get_quiz_pdfs(source_type, notebook_id):
|
| 466 |
+
if source_type != "PDF":
|
| 467 |
+
return gr.update(visible=False, choices=[], value=None)
|
| 468 |
+
if not notebook_id:
|
| 469 |
+
return gr.update(visible=False, choices=[], value=None)
|
| 470 |
+
|
| 471 |
+
# Search across all users for this notebook_id
|
| 472 |
+
base = Path("data") / "uploads"
|
| 473 |
+
pdfs = []
|
| 474 |
+
if base.exists():
|
| 475 |
+
for user_dir in base.iterdir():
|
| 476 |
+
nb_dir = user_dir / str(notebook_id)
|
| 477 |
+
if nb_dir.exists():
|
| 478 |
+
pdfs = sorted([p.name for p in nb_dir.glob("*.pdf")])
|
| 479 |
+
break
|
| 480 |
+
|
| 481 |
+
print(f"DEBUG quiz pdfs found: {pdfs}")
|
| 482 |
+
return gr.update(visible=True, choices=pdfs, value=pdfs[0] if pdfs else None)
|
| 483 |
+
|
| 484 |
+
def _quiz_pdf_dropdown_update(source_type, notebook_id, profile: gr.OAuthProfile | None):
|
| 485 |
+
if source_type != "PDF":
|
| 486 |
+
return gr.update(visible=False, choices=[], value=None)
|
| 487 |
+
|
| 488 |
+
if not notebook_id:
|
| 489 |
+
return gr.update(visible=True, choices=[], value=None)
|
| 490 |
+
|
| 491 |
+
user_id = _user_id(profile)
|
| 492 |
+
|
| 493 |
+
# Try with user_id first (production)
|
| 494 |
+
if user_id:
|
| 495 |
+
target_dir = Path("data") / "uploads" / user_id / str(notebook_id)
|
| 496 |
+
if target_dir.exists():
|
| 497 |
+
pdfs = sorted([p.name for p in target_dir.glob("*.pdf")])
|
| 498 |
+
return gr.update(visible=True, choices=pdfs, value=pdfs[0] if pdfs else None)
|
| 499 |
+
|
| 500 |
+
# Fallback for local dev (no OAuth): scan all user folders
|
| 501 |
+
base = Path("data") / "uploads"
|
| 502 |
+
if base.exists():
|
| 503 |
+
for user_dir in base.iterdir():
|
| 504 |
+
if not user_dir.is_dir():
|
| 505 |
+
continue
|
| 506 |
+
nb_dir = user_dir / str(notebook_id)
|
| 507 |
+
if nb_dir.exists():
|
| 508 |
+
pdfs = sorted([p.name for p in nb_dir.glob("*.pdf")])
|
| 509 |
+
print(f"DEBUG (local fallback): notebook_id={notebook_id}, pdfs={pdfs}")
|
| 510 |
+
return gr.update(visible=True, choices=pdfs, value=pdfs[0] if pdfs else None)
|
| 511 |
+
|
| 512 |
+
return gr.update(visible=True, choices=[], value=None)
|
| 513 |
+
|
| 514 |
+
def _generate_btn_update(source_type, pdf_name):
|
| 515 |
+
if source_type == "PDF":
|
| 516 |
+
return gr.update(interactive=bool(pdf_name))
|
| 517 |
+
return gr.update(interactive=True)
|
| 518 |
+
|
| 519 |
with gr.Blocks(
|
| 520 |
title="NotebookLM Clone - Notebooks",
|
| 521 |
theme=theme,
|
|
|
|
| 592 |
|
| 593 |
status = gr.Markdown("Sign in with Hugging Face to manage notebooks.", elem_classes=["status"])
|
| 594 |
|
|
|
|
|
|
|
|
|
|
| 595 |
# Create button
|
| 596 |
create_btn.click(
|
| 597 |
_safe_create,
|
|
|
|
| 647 |
outputs=[nb_state, selected_notebook_id, status] + row_outputs,
|
| 648 |
api_name=False,
|
| 649 |
).then(_list_uploaded_pdfs, inputs=[selected_notebook_id], outputs=[uploaded_pdf_dd])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 650 |
|
| 651 |
# Text Input Section
|
| 652 |
gr.Markdown("---")
|
|
|
|
| 673 |
# Quiz Section
|
| 674 |
gr.Markdown("---")
|
| 675 |
gr.Markdown("## Generate Quiz")
|
|
|
|
| 676 |
|
| 677 |
quiz_source_type = gr.Radio(
|
| 678 |
choices=["Text", "PDF", "URL", "All"],
|
| 679 |
value="All",
|
| 680 |
label="Source type",
|
| 681 |
)
|
| 682 |
+
|
| 683 |
+
|
| 684 |
quiz_pdf_dd = gr.Dropdown(
|
| 685 |
+
label="Select PDF (select a notebook first if empty)",
|
| 686 |
choices=[],
|
| 687 |
value=None,
|
| 688 |
visible=False,
|
| 689 |
)
|
| 690 |
+
|
| 691 |
+
demo.load(_initial_load, inputs=None, outputs=[nb_state, selected_notebook_id, status] + row_outputs, api_name=False)
|
| 692 |
+
demo.load(_list_uploaded_pdfs, inputs=[selected_notebook_id], outputs=[uploaded_pdf_dd], api_name=False)
|
| 693 |
+
demo.load(
|
| 694 |
+
_quiz_pdf_dropdown_update,
|
| 695 |
+
inputs=[quiz_source_type, selected_notebook_id],
|
| 696 |
+
outputs=[quiz_pdf_dd],
|
| 697 |
+
api_name=False,
|
| 698 |
+
)
|
| 699 |
+
|
| 700 |
generate_quiz_btn = gr.Button("Generate Quiz", variant="primary")
|
|
|
|
|
|
|
| 701 |
|
| 702 |
+
quiz_status = gr.Markdown("")
|
| 703 |
+
quiz_state = gr.State([])
|
| 704 |
+
|
| 705 |
quiz_components = []
|
| 706 |
for i in range(5):
|
| 707 |
with gr.Group(visible=False) as q_group:
|
|
|
|
| 713 |
submit_quiz_btn = gr.Button("Submit Answers", variant="secondary", visible=False)
|
| 714 |
quiz_results = gr.Markdown("")
|
| 715 |
|
| 716 |
+
for i in range(MAX_NOTEBOOKS):
|
| 717 |
+
select_btn = row_components[i]["select"]
|
| 718 |
+
def _on_select(i=i):
|
| 719 |
+
return "Selected notebook updated. Use this for chat/ingestion."
|
| 720 |
+
|
| 721 |
+
select_btn.click(
|
| 722 |
+
_select_notebook,
|
| 723 |
+
inputs=[gr.State(i), nb_state],
|
| 724 |
+
outputs=[selected_notebook_id],
|
| 725 |
+
api_name=False,
|
| 726 |
+
).then(
|
| 727 |
+
_on_select, None, [status]
|
| 728 |
+
).then(
|
| 729 |
+
_list_uploaded_pdfs, inputs=[selected_notebook_id], outputs=[uploaded_pdf_dd]
|
| 730 |
+
).then(
|
| 731 |
+
_quiz_pdf_dropdown_update,
|
| 732 |
+
inputs=[quiz_source_type, selected_notebook_id],
|
| 733 |
+
outputs=[quiz_pdf_dd],
|
| 734 |
+
api_name=False,
|
| 735 |
+
).then(
|
| 736 |
+
_generate_btn_update,
|
| 737 |
+
inputs=[quiz_source_type, quiz_pdf_dd],
|
| 738 |
+
outputs=[generate_quiz_btn],
|
| 739 |
+
api_name=False,
|
| 740 |
+
)
|
| 741 |
+
|
| 742 |
+
|
| 743 |
submit_btn.click(
|
| 744 |
_do_upload,
|
| 745 |
inputs=[txt_input, txt_title, selected_notebook_id],
|
|
|
|
| 747 |
)
|
| 748 |
|
| 749 |
quiz_source_type.change(
|
| 750 |
+
_quiz_pdf_dropdown_update,
|
| 751 |
inputs=[quiz_source_type, selected_notebook_id],
|
| 752 |
outputs=[quiz_pdf_dd],
|
| 753 |
+
api_name=False,
|
| 754 |
+
).then(
|
| 755 |
+
_generate_btn_update,
|
| 756 |
+
inputs=[quiz_source_type, quiz_pdf_dd],
|
| 757 |
+
outputs=[generate_quiz_btn],
|
| 758 |
+
api_name=False,
|
| 759 |
)
|
| 760 |
|
| 761 |
+
quiz_pdf_dd.change(
|
| 762 |
+
_generate_btn_update,
|
| 763 |
+
inputs=[quiz_source_type, quiz_pdf_dd],
|
| 764 |
+
outputs=[generate_quiz_btn],
|
| 765 |
+
api_name=False,
|
| 766 |
+
)
|
| 767 |
+
|
| 768 |
+
|
| 769 |
+
|
| 770 |
quiz_all_outputs = [quiz_status, quiz_state]
|
| 771 |
for c in quiz_components:
|
| 772 |
quiz_all_outputs += [c["group"], c["text"], c["radio"], c["textbox"]]
|
| 773 |
quiz_all_outputs += [submit_quiz_btn, quiz_results]
|
| 774 |
|
| 775 |
generate_quiz_btn.click(
|
| 776 |
+
lambda: gr.update(value="Generating quiz..."),
|
| 777 |
+
inputs=[],
|
| 778 |
+
outputs=[quiz_status],
|
| 779 |
+
api_name=False,
|
| 780 |
+
).then(
|
| 781 |
_generate_quiz,
|
| 782 |
inputs=[selected_notebook_id, quiz_source_type, quiz_pdf_dd],
|
| 783 |
outputs=quiz_all_outputs,
|
backend/quiz_service.py
CHANGED
|
@@ -88,9 +88,12 @@ def generate_quiz(notebook_id: str, source_type: str = "all", source_id: str = N
|
|
| 88 |
chunks = _get_chunks_by_source_id(notebook_id, source_id)
|
| 89 |
elif source_type in ("txt", "url"):
|
| 90 |
chunks = _get_chunks_by_type(notebook_id, source_type)
|
|
|
|
|
|
|
|
|
|
| 91 |
else:
|
| 92 |
chunks = _get_chunks_for_notebook(notebook_id)
|
| 93 |
-
|
| 94 |
print(f"Found {len(chunks)} chunks for source_type={source_type}")
|
| 95 |
if not chunks:
|
| 96 |
raise ValueError("No chunks found for this source. Please add sources first.")
|
|
@@ -137,33 +140,47 @@ def _get_chunks_by_source_id(notebook_id: str, source_id: str, limit: int = 10)
|
|
| 137 |
|
| 138 |
|
| 139 |
def _get_chunks_by_type(notebook_id: str, source_type: str, limit: int = 10) -> list[str]:
|
| 140 |
-
"""Fetch chunks filtered by source type (txt = UUID source_ids, url = url_ prefix)."""
|
| 141 |
result = (
|
| 142 |
supabase.table("chunks")
|
| 143 |
-
.select("content, source_id")
|
| 144 |
.eq("notebook_id", notebook_id)
|
|
|
|
| 145 |
.execute()
|
| 146 |
)
|
| 147 |
rows = result.data or []
|
| 148 |
if source_type == "url":
|
| 149 |
-
filtered = [r
|
| 150 |
-
else: # txt
|
| 151 |
-
filtered = [r
|
| 152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
|
| 155 |
def _parse_quiz(raw: str) -> list[dict]:
|
| 156 |
print(f"RAW OUTPUT:\n{raw}\n")
|
| 157 |
-
# Find the start of the JSON array
|
| 158 |
start = raw.find('[')
|
| 159 |
if start == -1:
|
| 160 |
raise ValueError("No JSON array found in model output.")
|
| 161 |
|
| 162 |
json_str = raw[start:].strip()
|
| 163 |
|
| 164 |
-
#
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
|
| 169 |
-
|
|
|
|
| 88 |
chunks = _get_chunks_by_source_id(notebook_id, source_id)
|
| 89 |
elif source_type in ("txt", "url"):
|
| 90 |
chunks = _get_chunks_by_type(notebook_id, source_type)
|
| 91 |
+
if not chunks:
|
| 92 |
+
source_label = "text" if source_type == "txt" else "URL"
|
| 93 |
+
raise ValueError(f"No {source_label} sources found. Please add a {source_label} source first.")
|
| 94 |
else:
|
| 95 |
chunks = _get_chunks_for_notebook(notebook_id)
|
| 96 |
+
|
| 97 |
print(f"Found {len(chunks)} chunks for source_type={source_type}")
|
| 98 |
if not chunks:
|
| 99 |
raise ValueError("No chunks found for this source. Please add sources first.")
|
|
|
|
| 140 |
|
| 141 |
|
| 142 |
def _get_chunks_by_type(notebook_id: str, source_type: str, limit: int = 10) -> list[str]:
|
|
|
|
| 143 |
result = (
|
| 144 |
supabase.table("chunks")
|
| 145 |
+
.select("content, source_id, created_at")
|
| 146 |
.eq("notebook_id", notebook_id)
|
| 147 |
+
.order("created_at", desc=True)
|
| 148 |
.execute()
|
| 149 |
)
|
| 150 |
rows = result.data or []
|
| 151 |
if source_type == "url":
|
| 152 |
+
filtered = [r for r in rows if r["source_id"].startswith("url_")]
|
| 153 |
+
else: # txt
|
| 154 |
+
filtered = [r for r in rows if not r["source_id"].startswith("url_") and not r["source_id"].endswith(".pdf")]
|
| 155 |
+
|
| 156 |
+
if not filtered:
|
| 157 |
+
return []
|
| 158 |
+
|
| 159 |
+
# Use only the most recent source_id
|
| 160 |
+
latest_source_id = filtered[0]["source_id"]
|
| 161 |
+
return [r["content"] for r in filtered if r["source_id"] == latest_source_id][:limit]
|
| 162 |
|
| 163 |
|
| 164 |
def _parse_quiz(raw: str) -> list[dict]:
|
| 165 |
print(f"RAW OUTPUT:\n{raw}\n")
|
|
|
|
| 166 |
start = raw.find('[')
|
| 167 |
if start == -1:
|
| 168 |
raise ValueError("No JSON array found in model output.")
|
| 169 |
|
| 170 |
json_str = raw[start:].strip()
|
| 171 |
|
| 172 |
+
# Try parsing as-is first
|
| 173 |
+
try:
|
| 174 |
+
return json.loads(json_str)
|
| 175 |
+
except json.JSONDecodeError:
|
| 176 |
+
pass
|
| 177 |
+
|
| 178 |
+
# Try fixing truncated JSON by extracting complete objects only
|
| 179 |
+
objects = re.findall(r'\{[^{}]+\}', json_str, re.DOTALL)
|
| 180 |
+
if objects:
|
| 181 |
+
try:
|
| 182 |
+
return json.loads('[' + ','.join(objects) + ']')
|
| 183 |
+
except json.JSONDecodeError:
|
| 184 |
+
pass
|
| 185 |
|
| 186 |
+
raise ValueError("Could not parse quiz JSON from model output.")
|