dlokesha commited on
Commit
e9940ca
·
1 Parent(s): 00f3502

Updated quiz feature

Browse files
Files changed (2) hide show
  1. app.py +132 -27
  2. backend/quiz_service.py +30 -13
app.py CHANGED
@@ -381,19 +381,19 @@ def _load_sources(notebook_id, profile: gr.OAuthProfile | None):
381
  return _format_sources(sources)
382
 
383
  # Quiz Handlers
384
- def _get_notebook_pdfs(notebook_id):
385
- if not notebook_id:
 
386
  return gr.update(choices=[], value=None, visible=False)
387
- from backend.db import supabase
388
- result = (
389
- supabase.table("chunks")
390
- .select("source_id")
391
- .eq("notebook_id", notebook_id)
392
- .execute()
393
- )
394
- pdfs = list({r["source_id"] for r in (result.data or []) if r["source_id"].endswith(".pdf")})
395
  return gr.update(choices=pdfs, value=pdfs[0] if pdfs else None, visible=True)
396
 
 
397
  def _generate_quiz(notebook_id, source_type, pdf_source_id, profile: gr.OAuthProfile | None):
398
  from backend.quiz_service import generate_quiz
399
 
@@ -406,6 +406,9 @@ def _generate_quiz(notebook_id, source_type, pdf_source_id, profile: gr.OAuthPro
406
  type_map = {"Text": "txt", "PDF": "pdf", "URL": "url", "All": "all"}
407
  source_type_key = type_map.get(source_type, "all")
408
 
 
 
 
409
  try:
410
  result = generate_quiz(notebook_id, source_type=source_type_key, source_id=pdf_source_id)
411
  questions = result["questions"]
@@ -419,6 +422,7 @@ def _generate_quiz(notebook_id, source_type, pdf_source_id, profile: gr.OAuthPro
419
  elif q["type"] == "true_false":
420
  updates += [gr.update(visible=True), gr.update(value=q_label), gr.update(choices=["True", "False"], value=None, visible=True), gr.update(value="", visible=False)]
421
  else:
 
422
  updates += [gr.update(visible=True), gr.update(value=q_label), gr.update(choices=[], value=None, visible=False), gr.update(value="", visible=True)]
423
  else:
424
  updates += [gr.update(visible=False), gr.update(value=""), gr.update(choices=[], value=None, visible=False), gr.update(value="", visible=False)]
@@ -458,6 +462,60 @@ def _submit_quiz(questions, *answers):
458
  lines.append(f"\n**Score: {score}/{len(questions)}**")
459
  return "\n\n".join(lines)
460
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
461
  with gr.Blocks(
462
  title="NotebookLM Clone - Notebooks",
463
  theme=theme,
@@ -534,9 +592,6 @@ with gr.Blocks(
534
 
535
  status = gr.Markdown("Sign in with Hugging Face to manage notebooks.", elem_classes=["status"])
536
 
537
- demo.load(_initial_load, inputs=None, outputs=[nb_state, selected_notebook_id, status] + row_outputs, api_name=False)
538
- demo.load(_list_uploaded_pdfs, inputs=[selected_notebook_id], outputs=[uploaded_pdf_dd], api_name=False)
539
-
540
  # Create button
541
  create_btn.click(
542
  _safe_create,
@@ -592,15 +647,6 @@ with gr.Blocks(
592
  outputs=[nb_state, selected_notebook_id, status] + row_outputs,
593
  api_name=False,
594
  ).then(_list_uploaded_pdfs, inputs=[selected_notebook_id], outputs=[uploaded_pdf_dd])
595
- def _on_select():
596
- return "Selected notebook updated. Use this for chat/ingestion."
597
- select_btn.click(
598
- _select_notebook,
599
- inputs=[gr.State(i), nb_state],
600
- outputs=[selected_notebook_id],
601
- api_name=False,
602
- ).then(_on_select, None, [status]).then(_list_uploaded_pdfs, inputs=[selected_notebook_id], outputs=[uploaded_pdf_dd])
603
-
604
 
605
  # Text Input Section
606
  gr.Markdown("---")
@@ -627,23 +673,35 @@ with gr.Blocks(
627
  # Quiz Section
628
  gr.Markdown("---")
629
  gr.Markdown("## Generate Quiz")
630
- gr.Markdown("Select a source type then generate a quiz.")
631
 
632
  quiz_source_type = gr.Radio(
633
  choices=["Text", "PDF", "URL", "All"],
634
  value="All",
635
  label="Source type",
636
  )
 
 
637
  quiz_pdf_dd = gr.Dropdown(
638
- label="Select PDF",
639
  choices=[],
640
  value=None,
641
  visible=False,
642
  )
 
 
 
 
 
 
 
 
 
 
643
  generate_quiz_btn = gr.Button("Generate Quiz", variant="primary")
644
- quiz_status = gr.Markdown("")
645
- quiz_state = gr.State([])
646
 
 
 
 
647
  quiz_components = []
648
  for i in range(5):
649
  with gr.Group(visible=False) as q_group:
@@ -655,6 +713,33 @@ with gr.Blocks(
655
  submit_quiz_btn = gr.Button("Submit Answers", variant="secondary", visible=False)
656
  quiz_results = gr.Markdown("")
657
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
658
  submit_btn.click(
659
  _do_upload,
660
  inputs=[txt_input, txt_title, selected_notebook_id],
@@ -662,17 +747,37 @@ with gr.Blocks(
662
  )
663
 
664
  quiz_source_type.change(
665
- lambda t, nb: _get_notebook_pdfs(nb) if t == "PDF" else gr.update(visible=False, choices=[], value=None),
666
  inputs=[quiz_source_type, selected_notebook_id],
667
  outputs=[quiz_pdf_dd],
 
 
 
 
 
 
668
  )
669
 
 
 
 
 
 
 
 
 
 
670
  quiz_all_outputs = [quiz_status, quiz_state]
671
  for c in quiz_components:
672
  quiz_all_outputs += [c["group"], c["text"], c["radio"], c["textbox"]]
673
  quiz_all_outputs += [submit_quiz_btn, quiz_results]
674
 
675
  generate_quiz_btn.click(
 
 
 
 
 
676
  _generate_quiz,
677
  inputs=[selected_notebook_id, quiz_source_type, quiz_pdf_dd],
678
  outputs=quiz_all_outputs,
 
381
  return _format_sources(sources)
382
 
383
  # Quiz Handlers
384
+ def _get_notebook_pdfs(notebook_id, profile: gr.OAuthProfile | None):
385
+ user_id = _user_id(profile)
386
+ if not user_id or not notebook_id:
387
  return gr.update(choices=[], value=None, visible=False)
388
+
389
+ target_dir = Path("data") / "uploads" / user_id / str(notebook_id)
390
+ if not target_dir.exists():
391
+ return gr.update(choices=[], value=None, visible=False)
392
+
393
+ pdfs = sorted([p.name for p in target_dir.glob("*.pdf")])
 
 
394
  return gr.update(choices=pdfs, value=pdfs[0] if pdfs else None, visible=True)
395
 
396
+
397
  def _generate_quiz(notebook_id, source_type, pdf_source_id, profile: gr.OAuthProfile | None):
398
  from backend.quiz_service import generate_quiz
399
 
 
406
  type_map = {"Text": "txt", "PDF": "pdf", "URL": "url", "All": "all"}
407
  source_type_key = type_map.get(source_type, "all")
408
 
409
+ if source_type_key == "pdf" and not pdf_source_id:
410
+ return "Pick a PDF first.", [], *([gr.update(visible=False)] * 5 * 4), gr.update(visible=False), ""
411
+
412
  try:
413
  result = generate_quiz(notebook_id, source_type=source_type_key, source_id=pdf_source_id)
414
  questions = result["questions"]
 
422
  elif q["type"] == "true_false":
423
  updates += [gr.update(visible=True), gr.update(value=q_label), gr.update(choices=["True", "False"], value=None, visible=True), gr.update(value="", visible=False)]
424
  else:
425
+ # change this line for short_answer:
426
  updates += [gr.update(visible=True), gr.update(value=q_label), gr.update(choices=[], value=None, visible=False), gr.update(value="", visible=True)]
427
  else:
428
  updates += [gr.update(visible=False), gr.update(value=""), gr.update(choices=[], value=None, visible=False), gr.update(value="", visible=False)]
 
462
  lines.append(f"\n**Score: {score}/{len(questions)}**")
463
  return "\n\n".join(lines)
464
 
465
+ def _get_quiz_pdfs(source_type, notebook_id):
466
+ if source_type != "PDF":
467
+ return gr.update(visible=False, choices=[], value=None)
468
+ if not notebook_id:
469
+ return gr.update(visible=False, choices=[], value=None)
470
+
471
+ # Search across all users for this notebook_id
472
+ base = Path("data") / "uploads"
473
+ pdfs = []
474
+ if base.exists():
475
+ for user_dir in base.iterdir():
476
+ nb_dir = user_dir / str(notebook_id)
477
+ if nb_dir.exists():
478
+ pdfs = sorted([p.name for p in nb_dir.glob("*.pdf")])
479
+ break
480
+
481
+ print(f"DEBUG quiz pdfs found: {pdfs}")
482
+ return gr.update(visible=True, choices=pdfs, value=pdfs[0] if pdfs else None)
483
+
484
+ def _quiz_pdf_dropdown_update(source_type, notebook_id, profile: gr.OAuthProfile | None):
485
+ if source_type != "PDF":
486
+ return gr.update(visible=False, choices=[], value=None)
487
+
488
+ if not notebook_id:
489
+ return gr.update(visible=True, choices=[], value=None)
490
+
491
+ user_id = _user_id(profile)
492
+
493
+ # Try with user_id first (production)
494
+ if user_id:
495
+ target_dir = Path("data") / "uploads" / user_id / str(notebook_id)
496
+ if target_dir.exists():
497
+ pdfs = sorted([p.name for p in target_dir.glob("*.pdf")])
498
+ return gr.update(visible=True, choices=pdfs, value=pdfs[0] if pdfs else None)
499
+
500
+ # Fallback for local dev (no OAuth): scan all user folders
501
+ base = Path("data") / "uploads"
502
+ if base.exists():
503
+ for user_dir in base.iterdir():
504
+ if not user_dir.is_dir():
505
+ continue
506
+ nb_dir = user_dir / str(notebook_id)
507
+ if nb_dir.exists():
508
+ pdfs = sorted([p.name for p in nb_dir.glob("*.pdf")])
509
+ print(f"DEBUG (local fallback): notebook_id={notebook_id}, pdfs={pdfs}")
510
+ return gr.update(visible=True, choices=pdfs, value=pdfs[0] if pdfs else None)
511
+
512
+ return gr.update(visible=True, choices=[], value=None)
513
+
514
+ def _generate_btn_update(source_type, pdf_name):
515
+ if source_type == "PDF":
516
+ return gr.update(interactive=bool(pdf_name))
517
+ return gr.update(interactive=True)
518
+
519
  with gr.Blocks(
520
  title="NotebookLM Clone - Notebooks",
521
  theme=theme,
 
592
 
593
  status = gr.Markdown("Sign in with Hugging Face to manage notebooks.", elem_classes=["status"])
594
 
 
 
 
595
  # Create button
596
  create_btn.click(
597
  _safe_create,
 
647
  outputs=[nb_state, selected_notebook_id, status] + row_outputs,
648
  api_name=False,
649
  ).then(_list_uploaded_pdfs, inputs=[selected_notebook_id], outputs=[uploaded_pdf_dd])
 
 
 
 
 
 
 
 
 
650
 
651
  # Text Input Section
652
  gr.Markdown("---")
 
673
  # Quiz Section
674
  gr.Markdown("---")
675
  gr.Markdown("## Generate Quiz")
 
676
 
677
  quiz_source_type = gr.Radio(
678
  choices=["Text", "PDF", "URL", "All"],
679
  value="All",
680
  label="Source type",
681
  )
682
+
683
+
684
  quiz_pdf_dd = gr.Dropdown(
685
+ label="Select PDF (select a notebook first if empty)",
686
  choices=[],
687
  value=None,
688
  visible=False,
689
  )
690
+
691
+ demo.load(_initial_load, inputs=None, outputs=[nb_state, selected_notebook_id, status] + row_outputs, api_name=False)
692
+ demo.load(_list_uploaded_pdfs, inputs=[selected_notebook_id], outputs=[uploaded_pdf_dd], api_name=False)
693
+ demo.load(
694
+ _quiz_pdf_dropdown_update,
695
+ inputs=[quiz_source_type, selected_notebook_id],
696
+ outputs=[quiz_pdf_dd],
697
+ api_name=False,
698
+ )
699
+
700
  generate_quiz_btn = gr.Button("Generate Quiz", variant="primary")
 
 
701
 
702
+ quiz_status = gr.Markdown("")
703
+ quiz_state = gr.State([])
704
+
705
  quiz_components = []
706
  for i in range(5):
707
  with gr.Group(visible=False) as q_group:
 
713
  submit_quiz_btn = gr.Button("Submit Answers", variant="secondary", visible=False)
714
  quiz_results = gr.Markdown("")
715
 
716
+ for i in range(MAX_NOTEBOOKS):
717
+ select_btn = row_components[i]["select"]
718
+ def _on_select(i=i):
719
+ return "Selected notebook updated. Use this for chat/ingestion."
720
+
721
+ select_btn.click(
722
+ _select_notebook,
723
+ inputs=[gr.State(i), nb_state],
724
+ outputs=[selected_notebook_id],
725
+ api_name=False,
726
+ ).then(
727
+ _on_select, None, [status]
728
+ ).then(
729
+ _list_uploaded_pdfs, inputs=[selected_notebook_id], outputs=[uploaded_pdf_dd]
730
+ ).then(
731
+ _quiz_pdf_dropdown_update,
732
+ inputs=[quiz_source_type, selected_notebook_id],
733
+ outputs=[quiz_pdf_dd],
734
+ api_name=False,
735
+ ).then(
736
+ _generate_btn_update,
737
+ inputs=[quiz_source_type, quiz_pdf_dd],
738
+ outputs=[generate_quiz_btn],
739
+ api_name=False,
740
+ )
741
+
742
+
743
  submit_btn.click(
744
  _do_upload,
745
  inputs=[txt_input, txt_title, selected_notebook_id],
 
747
  )
748
 
749
  quiz_source_type.change(
750
+ _quiz_pdf_dropdown_update,
751
  inputs=[quiz_source_type, selected_notebook_id],
752
  outputs=[quiz_pdf_dd],
753
+ api_name=False,
754
+ ).then(
755
+ _generate_btn_update,
756
+ inputs=[quiz_source_type, quiz_pdf_dd],
757
+ outputs=[generate_quiz_btn],
758
+ api_name=False,
759
  )
760
 
761
+ quiz_pdf_dd.change(
762
+ _generate_btn_update,
763
+ inputs=[quiz_source_type, quiz_pdf_dd],
764
+ outputs=[generate_quiz_btn],
765
+ api_name=False,
766
+ )
767
+
768
+
769
+
770
  quiz_all_outputs = [quiz_status, quiz_state]
771
  for c in quiz_components:
772
  quiz_all_outputs += [c["group"], c["text"], c["radio"], c["textbox"]]
773
  quiz_all_outputs += [submit_quiz_btn, quiz_results]
774
 
775
  generate_quiz_btn.click(
776
+ lambda: gr.update(value="Generating quiz..."),
777
+ inputs=[],
778
+ outputs=[quiz_status],
779
+ api_name=False,
780
+ ).then(
781
  _generate_quiz,
782
  inputs=[selected_notebook_id, quiz_source_type, quiz_pdf_dd],
783
  outputs=quiz_all_outputs,
backend/quiz_service.py CHANGED
@@ -88,9 +88,12 @@ def generate_quiz(notebook_id: str, source_type: str = "all", source_id: str = N
88
  chunks = _get_chunks_by_source_id(notebook_id, source_id)
89
  elif source_type in ("txt", "url"):
90
  chunks = _get_chunks_by_type(notebook_id, source_type)
 
 
 
91
  else:
92
  chunks = _get_chunks_for_notebook(notebook_id)
93
-
94
  print(f"Found {len(chunks)} chunks for source_type={source_type}")
95
  if not chunks:
96
  raise ValueError("No chunks found for this source. Please add sources first.")
@@ -137,33 +140,47 @@ def _get_chunks_by_source_id(notebook_id: str, source_id: str, limit: int = 10)
137
 
138
 
139
  def _get_chunks_by_type(notebook_id: str, source_type: str, limit: int = 10) -> list[str]:
140
- """Fetch chunks filtered by source type (txt = UUID source_ids, url = url_ prefix)."""
141
  result = (
142
  supabase.table("chunks")
143
- .select("content, source_id")
144
  .eq("notebook_id", notebook_id)
 
145
  .execute()
146
  )
147
  rows = result.data or []
148
  if source_type == "url":
149
- filtered = [r["content"] for r in rows if r["source_id"].startswith("url_")]
150
- else: # txt — UUID source_ids
151
- filtered = [r["content"] for r in rows if not r["source_id"].startswith("url_") and not r["source_id"].endswith(".pdf")]
152
- return filtered[:limit]
 
 
 
 
 
 
153
 
154
 
155
  def _parse_quiz(raw: str) -> list[dict]:
156
  print(f"RAW OUTPUT:\n{raw}\n")
157
- # Find the start of the JSON array
158
  start = raw.find('[')
159
  if start == -1:
160
  raise ValueError("No JSON array found in model output.")
161
 
162
  json_str = raw[start:].strip()
163
 
164
- # If closing bracket is missing, add it
165
- if not json_str.endswith(']'):
166
- # Remove trailing comma if present
167
- json_str = json_str.rstrip().rstrip(',') + '\n]'
 
 
 
 
 
 
 
 
 
168
 
169
- return json.loads(json_str)
 
88
  chunks = _get_chunks_by_source_id(notebook_id, source_id)
89
  elif source_type in ("txt", "url"):
90
  chunks = _get_chunks_by_type(notebook_id, source_type)
91
+ if not chunks:
92
+ source_label = "text" if source_type == "txt" else "URL"
93
+ raise ValueError(f"No {source_label} sources found. Please add a {source_label} source first.")
94
  else:
95
  chunks = _get_chunks_for_notebook(notebook_id)
96
+
97
  print(f"Found {len(chunks)} chunks for source_type={source_type}")
98
  if not chunks:
99
  raise ValueError("No chunks found for this source. Please add sources first.")
 
140
 
141
 
142
  def _get_chunks_by_type(notebook_id: str, source_type: str, limit: int = 10) -> list[str]:
 
143
  result = (
144
  supabase.table("chunks")
145
+ .select("content, source_id, created_at")
146
  .eq("notebook_id", notebook_id)
147
+ .order("created_at", desc=True)
148
  .execute()
149
  )
150
  rows = result.data or []
151
  if source_type == "url":
152
+ filtered = [r for r in rows if r["source_id"].startswith("url_")]
153
+ else: # txt
154
+ filtered = [r for r in rows if not r["source_id"].startswith("url_") and not r["source_id"].endswith(".pdf")]
155
+
156
+ if not filtered:
157
+ return []
158
+
159
+ # Use only the most recent source_id
160
+ latest_source_id = filtered[0]["source_id"]
161
+ return [r["content"] for r in filtered if r["source_id"] == latest_source_id][:limit]
162
 
163
 
164
  def _parse_quiz(raw: str) -> list[dict]:
165
  print(f"RAW OUTPUT:\n{raw}\n")
 
166
  start = raw.find('[')
167
  if start == -1:
168
  raise ValueError("No JSON array found in model output.")
169
 
170
  json_str = raw[start:].strip()
171
 
172
+ # Try parsing as-is first
173
+ try:
174
+ return json.loads(json_str)
175
+ except json.JSONDecodeError:
176
+ pass
177
+
178
+ # Try fixing truncated JSON by extracting complete objects only
179
+ objects = re.findall(r'\{[^{}]+\}', json_str, re.DOTALL)
180
+ if objects:
181
+ try:
182
+ return json.loads('[' + ','.join(objects) + ']')
183
+ except json.JSONDecodeError:
184
+ pass
185
 
186
+ raise ValueError("Could not parse quiz JSON from model output.")