Spaces:

pbichpur
/

NotebookLMClone

Sleeping

App Files Files Community

github-actions[bot] commited on Feb 28

Commit

f09ce8f

1 Parent(s): 2e03e76

Sync from GitHub 1379732458575bf35217d4930a784dcfb30c8bf4

Browse files

Files changed (7) hide show

app.py +2 -2
frontend/app.py +88 -61
src/artifacts/quiz_generator.py +47 -9
src/artifacts/report_generator.py +100 -16
tests/test_artifact_api.py +14 -0
tests/test_artifacts.py +37 -0
tests/test_report_llm_providers.py +78 -0

app.py CHANGED Viewed

@@ -1005,8 +1005,8 @@ async def generate_report_for_notebook(
             topic_focus=payload.topic_focus,
         )
     except Exception as exc:
-        crud.update_artifact(db, artifact.id, status="failed", error_message=str(exc))
-        raise HTTPException(status_code=500, detail=f"Report generation failed: {exc}") from exc
     if "error" in result:
         artifact = crud.update_artifact(db, artifact.id, status="failed", error_message=result["error"])

             topic_focus=payload.topic_focus,
         )
     except Exception as exc:
+        artifact = crud.update_artifact(db, artifact.id, status="failed", error_message=str(exc))
+        return _artifact_response(artifact)
     if "error" in result:
         artifact = crud.update_artifact(db, artifact.id, status="failed", error_message=result["error"])

frontend/app.py CHANGED Viewed

@@ -450,6 +450,37 @@ def choose_workspace_section(notebook_id: int) -> str:
     return str(selected)
 inject_theme()
 if "page" not in st.session_state:
@@ -1113,78 +1144,74 @@ elif page == "Notebooks":
                                 render_metric_card("Failed", failed_count)
                             st.dataframe(artifacts, use_container_width=True, hide_index=True)
-                            artifact_options = {
-                                build_artifact_option_label(a): a
-                                for a in artifacts
-                                if isinstance(a, dict) and "id" in a
-                            }
-                            selected_artifact_label = st.selectbox(
-                                "Select artifact",
-                                options=list(artifact_options.keys()),
-                                key="selected_artifact_label",
                             )
-                            selected_artifact = artifact_options[selected_artifact_label]
-                            artifact_id = int(selected_artifact["id"])
-                            artifact_type = str(selected_artifact.get("type", ""))
-                            artifact_status = str(selected_artifact.get("status", ""))
-                            artifact_content = selected_artifact.get("content")
-                            artifact_error = selected_artifact.get("error_message")
-                            status_col, type_col = st.columns([1, 4])
-                            with status_col:
-                                render_status_pill(artifact_status)
-                            with type_col:
-                                st.caption(f"Artifact type: {artifact_type}")
-                            if artifact_error:
-                                st.error(str(artifact_error))
-                            if artifact_type == "report" and artifact_content:
-                                st.markdown("### Report Preview")
-                                st.markdown(str(artifact_content))
-                                st.download_button(
-                                    "Download report (.md)",
-                                    data=str(artifact_content).encode("utf-8"),
-                                    file_name=f"report_{artifact_id}.md",
-                                    mime="text/markdown",
-                                )
-                            elif artifact_type == "quiz" and artifact_content:
-                                st.markdown("### Quiz Preview")
-                                st.markdown(str(artifact_content))
-                                st.download_button(
-                                    "Download quiz (.md)",
-                                    data=str(artifact_content).encode("utf-8"),
-                                    file_name=f"quiz_{artifact_id}.md",
-                                    mime="text/markdown",
-                                )
-                            elif artifact_type == "podcast":
-                                if artifact_content:
-                                    st.markdown("### Transcript")
                                     st.markdown(str(artifact_content))
                                     st.download_button(
-                                        "Download transcript (.md)",
                                         data=str(artifact_content).encode("utf-8"),
-                                        file_name=f"podcast_transcript_{artifact_id}.md",
                                         mime="text/markdown",
                                     )
-                                if artifact_status == "ready":
-                                    ok_audio, audio_result, _ = api_get_bytes(
-                                        f"/notebooks/{selected_notebook_id}/artifacts/{artifact_id}/audio"
                                     )
-                                    if ok_audio and isinstance(audio_result, bytes):
-                                        st.audio(audio_result, format="audio/mp3")
                                         st.download_button(
-                                            "Download podcast (.mp3)",
-                                            data=audio_result,
-                                            file_name=f"podcast_{artifact_id}.mp3",
-                                            mime="audio/mpeg",
                                         )
                                     else:
-                                        st.error(f"Unable to load audio: {audio_result}")
                                 else:
-                                    st.info(f"Podcast status: {artifact_status}")
-                            else:
-                                st.info("Select an artifact to preview.")
                             if auto_refresh and in_flight > 0:
                                 st.caption(

     return str(selected)
+def choose_artifact_for_notebook(notebook_id: int, artifacts: list[dict[str, Any]]) -> dict[str, Any] | None:
+    valid_artifacts = [
+        artifact for artifact in artifacts if isinstance(artifact, dict) and "id" in artifact
+    ]
+    if not valid_artifacts:
+        return None
+    # Latest first for easier access to recently generated items.
+    valid_artifacts.sort(key=lambda a: int(a.get("id", 0)), reverse=True)
+    artifact_map = {int(a["id"]): a for a in valid_artifacts}
+    artifact_ids = list(artifact_map.keys())
+    state_key = f"selected_artifact_id_{notebook_id}"
+    previous_id = st.session_state.get(state_key)
+    if not isinstance(previous_id, int) or previous_id not in artifact_map:
+        previous_id = artifact_ids[0]
+    selected_id = st.selectbox(
+        "Select artifact",
+        options=artifact_ids,
+        index=artifact_ids.index(previous_id),
+        key=f"artifact_selector_{notebook_id}",
+        format_func=lambda aid: build_artifact_option_label(artifact_map[int(aid)]),
+    )
+    if not isinstance(selected_id, int):
+        return artifact_map[previous_id]
+    st.session_state[state_key] = selected_id
+    return artifact_map[selected_id]
 inject_theme()
 if "page" not in st.session_state:
                                 render_metric_card("Failed", failed_count)
                             st.dataframe(artifacts, use_container_width=True, hide_index=True)
+                            selected_artifact = choose_artifact_for_notebook(
+                                int(selected_notebook_id),
+                                artifacts,
                             )
+                            if selected_artifact is None:
+                                st.info("Select an artifact to preview.")
+                            else:
+                                artifact_id = int(selected_artifact["id"])
+                                artifact_type = str(selected_artifact.get("type", ""))
+                                artifact_status = str(selected_artifact.get("status", ""))
+                                artifact_content = selected_artifact.get("content")
+                                artifact_error = selected_artifact.get("error_message")
+                                status_col, type_col = st.columns([1, 4])
+                                with status_col:
+                                    render_status_pill(artifact_status)
+                                with type_col:
+                                    st.caption(f"Artifact type: {artifact_type}")
+                                if artifact_error:
+                                    st.error(str(artifact_error))
+                                if artifact_type == "report" and artifact_content:
+                                    st.markdown("### Report Preview")
                                     st.markdown(str(artifact_content))
                                     st.download_button(
+                                        "Download report (.md)",
                                         data=str(artifact_content).encode("utf-8"),
+                                        file_name=f"report_{artifact_id}.md",
                                         mime="text/markdown",
                                     )
+                                elif artifact_type == "quiz" and artifact_content:
+                                    st.markdown("### Quiz Preview")
+                                    st.markdown(str(artifact_content))
+                                    st.download_button(
+                                        "Download quiz (.md)",
+                                        data=str(artifact_content).encode("utf-8"),
+                                        file_name=f"quiz_{artifact_id}.md",
+                                        mime="text/markdown",
                                     )
+                                elif artifact_type == "podcast":
+                                    if artifact_content:
+                                        st.markdown("### Transcript")
+                                        st.markdown(str(artifact_content))
                                         st.download_button(
+                                            "Download transcript (.md)",
+                                            data=str(artifact_content).encode("utf-8"),
+                                            file_name=f"podcast_transcript_{artifact_id}.md",
+                                            mime="text/markdown",
+                                        )
+                                    if artifact_status == "ready":
+                                        ok_audio, audio_result, _ = api_get_bytes(
+                                            f"/notebooks/{selected_notebook_id}/artifacts/{artifact_id}/audio"
                                         )
+                                        if ok_audio and isinstance(audio_result, bytes):
+                                            st.audio(audio_result, format="audio/mp3")
+                                            st.download_button(
+                                                "Download podcast (.mp3)",
+                                                data=audio_result,
+                                                file_name=f"podcast_{artifact_id}.mp3",
+                                                mime="audio/mpeg",
+                                            )
+                                        else:
+                                            st.error(f"Unable to load audio: {audio_result}")
                                     else:
+                                        st.info(f"Podcast status: {artifact_status}")
                                 else:
+                                    st.info("Select an artifact to preview.")
                             if auto_refresh and in_flight > 0:
                                 st.caption(

src/artifacts/quiz_generator.py CHANGED Viewed

@@ -312,13 +312,9 @@ class QuizGenerator:
             if not prompt:
                 continue
-            options_raw = item.get("options")
-            options: List[str] = []
-            if isinstance(options_raw, list):
-                for opt in options_raw:
-                    text = str(opt).strip()
-                    if text:
-                        options.append(text)
             answer = self._normalize_answer_letter(str(item.get("correct_answer", "")).strip())
             explanation = str(item.get("explanation", "")).strip()
@@ -339,8 +335,48 @@ class QuizGenerator:
                 break
         return normalized
     def _normalize_answer_letter(self, value: str) -> str:
-        match = re.search(r"[A-D]", value.upper())
         return match.group(0) if match else ""
     def _build_quiz_prompt(self, context: str, num_questions: int, difficulty: str) -> str:
@@ -412,7 +448,9 @@ Requirements:
             lines.append(f"### {idx}. {prompt or 'Question'}")
             options = question.get("options", [])
             for option in options if isinstance(options, list) else []:
-                lines.append(f"- {str(option)}")
             lines.append("")
         lines.append("## Answer Key")

             if not prompt:
                 continue
+            options = self._normalize_options(item.get("options"))
+            if not options:
+                continue
             answer = self._normalize_answer_letter(str(item.get("correct_answer", "")).strip())
             explanation = str(item.get("explanation", "")).strip()
                 break
         return normalized
+    def _normalize_options(self, options_raw: Any) -> List[str]:
+        """
+        Normalize options into 2-6 labeled choices: A) ... B) ... etc.
+        Accepts list, dict, or multiline string.
+        """
+        parsed: List[str] = []
+        if isinstance(options_raw, list):
+            parsed = [str(opt).strip() for opt in options_raw if str(opt).strip()]
+        elif isinstance(options_raw, dict):
+            for key in sorted(options_raw.keys()):
+                value = str(options_raw.get(key, "")).strip()
+                if not value:
+                    continue
+                parsed.append(f"{str(key).strip().upper()}) {value}")
+        elif isinstance(options_raw, str):
+            lines = [line.strip() for line in options_raw.replace("\r", "\n").split("\n")]
+            parsed = [line for line in lines if line]
+        if not parsed:
+            return []
+        cleaned: List[str] = []
+        letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+        for idx, text in enumerate(parsed):
+            body = self._option_text_only(text)
+            if not body:
+                continue
+            label = letters[idx] if idx < len(letters) else str(idx + 1)
+            cleaned.append(f"{label}) {body}")
+        # Keep a reasonable range and ensure common quiz shape is preserved.
+        return cleaned[:6]
+    def _option_text_only(self, value: str) -> str:
+        text = str(value or "").strip()
+        # Strip prefixes like "A)", "A.", "(A)", "1)", "1."
+        text = re.sub(r"^\(?[A-Z0-9]\)?[\.\):\-]\s*", "", text, flags=re.IGNORECASE)
+        return text.strip()
     def _normalize_answer_letter(self, value: str) -> str:
+        match = re.search(r"[A-Z]", value.upper())
         return match.group(0) if match else ""
     def _build_quiz_prompt(self, context: str, num_questions: int, difficulty: str) -> str:
             lines.append(f"### {idx}. {prompt or 'Question'}")
             options = question.get("options", [])
             for option in options if isinstance(options, list) else []:
+                option_text = str(option).strip()
+                if option_text:
+                    lines.append(f"- {option_text}")
             lines.append("")
         lines.append("## Answer Key")

src/artifacts/report_generator.py CHANGED Viewed

@@ -10,17 +10,73 @@ from typing import Optional
 from dotenv import load_dotenv
 from openai import OpenAI
 from src.ingestion.vectorstore import ChromaAdapter
 load_dotenv()
 class ReportGenerator:
-    def __init__(self, api_key: Optional[str] = None, model: Optional[str] = None):
-        self.api_key = api_key or os.getenv("OPENAI_API_KEY")
-        self.model = model or os.getenv("LLM_MODEL", "gpt-4o-mini")
-        self.client = OpenAI(api_key=self.api_key)
     def generate_report(
         self,
@@ -40,6 +96,8 @@ class ReportGenerator:
         return {
             "content": report_markdown,
             "detail_level": detail_level,
         }
     def _get_report_context(self, user_id: str, notebook_id: str, topic_focus: str | None) -> str:
@@ -101,24 +159,50 @@ Requirements:
 """
         try:
-            response = self.client.chat.completions.create(
                 model=self.model,
                 messages=[
-                    {
-                        "role": "system",
-                        "content": (
-                            "You write high quality reports grounded only in provided source context. "
-                            "Do not invent facts."
-                        ),
-                    },
                     {"role": "user", "content": prompt},
                 ],
                 temperature=0.4,
             )
-            content = response.choices[0].message.content or ""
-            return str(content).strip()
-        except Exception:
-            return ""
     def save_report(self, markdown_text: str, user_id: str, notebook_id: str) -> str:
         report_dir = Path(f"data/users/{user_id}/notebooks/{notebook_id}/artifacts/reports")

 from dotenv import load_dotenv
 from openai import OpenAI
+import requests
 from src.ingestion.vectorstore import ChromaAdapter
 load_dotenv()
+SUPPORTED_REPORT_LLM_PROVIDERS = {"openai", "groq", "ollama"}
+DEFAULT_REPORT_MODELS = {
+    "openai": "gpt-4o-mini",
+    "groq": "llama-3.1-8b-instant",
+    "ollama": "qwen2.5:3b",
+}
+REPORT_SYSTEM_PROMPT = (
+    "You write high quality reports grounded only in provided source context. "
+    "Do not invent facts."
+)
 class ReportGenerator:
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        model: Optional[str] = None,
+        llm_provider: Optional[str] = None,
+    ):
+        provider_default = (
+            llm_provider
+            or os.getenv("REPORT_LLM_PROVIDER", "").strip()
+            or os.getenv("QUIZ_LLM_PROVIDER", "").strip()
+            or os.getenv("TRANSCRIPT_LLM_PROVIDER", "").strip()
+            or "openai"
+        )
+        self.llm_provider = provider_default.strip().lower()
+        if self.llm_provider not in SUPPORTED_REPORT_LLM_PROVIDERS:
+            raise ValueError(
+                f"Unsupported REPORT_LLM_PROVIDER='{self.llm_provider}'. "
+                f"Choose from: {sorted(SUPPORTED_REPORT_LLM_PROVIDERS)}"
+            )
+        self.model = self._resolve_model_name(model)
+        self._openai_client: OpenAI | None = None
+        self._groq_client = None
+        self._ollama_base_url = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434").rstrip("/")
+        if self.llm_provider == "openai":
+            self.api_key = api_key or os.getenv("OPENAI_API_KEY")
+            self._openai_client = OpenAI(api_key=self.api_key)
+        elif self.llm_provider == "groq":
+            from groq import Groq
+            groq_api_key = os.getenv("GROQ_API_KEY")
+            if not groq_api_key:
+                raise ValueError("GROQ_API_KEY is required when REPORT_LLM_PROVIDER=groq")
+            self._groq_client = Groq(api_key=groq_api_key)
+        else:
+            self.api_key = None
+    def _resolve_model_name(self, explicit_model: Optional[str]) -> str:
+        if explicit_model and explicit_model.strip():
+            return explicit_model.strip()
+        configured = os.getenv("REPORT_LLM_MODEL", "").strip()
+        if configured:
+            return configured
+        legacy = os.getenv("LLM_MODEL", "").strip()
+        if legacy:
+            return legacy
+        return DEFAULT_REPORT_MODELS.get(self.llm_provider, "gpt-4o-mini")
     def generate_report(
         self,
         return {
             "content": report_markdown,
             "detail_level": detail_level,
+            "llm_provider": self.llm_provider,
+            "llm_model": self.model,
         }
     def _get_report_context(self, user_id: str, notebook_id: str, topic_focus: str | None) -> str:
 """
         try:
+            return self._generate_report_content(prompt)
+        except Exception:
+            return ""
+    def _generate_report_content(self, prompt: str) -> str:
+        if self.llm_provider == "openai":
+            assert self._openai_client is not None
+            response = self._openai_client.chat.completions.create(
                 model=self.model,
                 messages=[
+                    {"role": "system", "content": REPORT_SYSTEM_PROMPT},
                     {"role": "user", "content": prompt},
                 ],
                 temperature=0.4,
             )
+            return str(response.choices[0].message.content or "").strip()
+        if self.llm_provider == "groq":
+            assert self._groq_client is not None
+            response = self._groq_client.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {"role": "system", "content": REPORT_SYSTEM_PROMPT},
+                    {"role": "user", "content": prompt},
+                ],
+                temperature=0.4,
+            )
+            return str(response.choices[0].message.content or "").strip()
+        payload = {
+            "model": self.model,
+            "system": REPORT_SYSTEM_PROMPT,
+            "prompt": prompt,
+            "stream": False,
+            "options": {"temperature": 0.4},
+        }
+        response = requests.post(
+            f"{self._ollama_base_url}/api/generate",
+            json=payload,
+            timeout=120,
+        )
+        response.raise_for_status()
+        body = response.json()
+        return str(body.get("response", "")).strip()
     def save_report(self, markdown_text: str, user_id: str, notebook_id: str) -> str:
         report_dir = Path(f"data/users/{user_id}/notebooks/{notebook_id}/artifacts/reports")

tests/test_artifact_api.py CHANGED Viewed

@@ -208,6 +208,20 @@ class TestReportEndpoint:
         )
         assert resp.status_code == 400
     def test_generate_report_unknown_notebook_404(self, client):
         resp = client.post("/notebooks/9999/artifacts/report", json={"detail_level": "medium"})
         assert resp.status_code == 404

         )
         assert resp.status_code == 400
+    def test_generate_report_provider_config_error_returns_failed_artifact(self, client, notebook):
+        """Provider init/runtime errors should not bubble as HTTP 500."""
+        with patch("app.ReportGenerator", side_effect=ValueError("missing provider credentials")):
+            resp = client.post(
+                f"/notebooks/{notebook.id}/artifacts/report",
+                json={"detail_level": "medium"},
+            )
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["type"] == "report"
+        assert data["status"] == "failed"
+        assert "missing provider credentials" in str(data.get("error_message"))
     def test_generate_report_unknown_notebook_404(self, client):
         resp = client.post("/notebooks/9999/artifacts/report", json={"detail_level": "medium"})
         assert resp.status_code == 404

tests/test_artifacts.py CHANGED Viewed

@@ -182,6 +182,43 @@ class TestQuizGenerator:
         assert "## Answer Key" in saved
         assert "1. **B**" in saved
 # ── PodcastGenerator tests ────────────────────────────────────────────────────

         assert "## Answer Key" in saved
         assert "1. **B**" in saved
+    def test_generate_quiz_normalizes_multiline_options(self, tmp_path):
+        """Multiline option strings are normalized into labeled bullet options."""
+        _chroma_dir(tmp_path)
+        mock_store = MagicMock()
+        mock_store.query.return_value = MOCK_CHROMA_RESULTS
+        raw_payload = {
+            "questions": [
+                {
+                    "id": 1,
+                    "question": "What is the goal?",
+                    "options": "A) One\nB) Two\nC) Three\nD) Four",
+                    "correct_answer": "B) Two",
+                    "explanation": "Two is correct.",
+                    "topic": "Goals",
+                }
+            ]
+        }
+        mock_llm_resp = _make_openai_chat_response(raw_payload)
+        env = {"STORAGE_BASE_DIR": str(tmp_path / "data"), "OPENAI_API_KEY": "test-key"}
+        with patch.dict(os.environ, env):
+            with patch("src.artifacts.quiz_generator.ChromaAdapter", return_value=mock_store):
+                with patch("src.artifacts.quiz_generator.OpenAI") as mock_openai_cls:
+                    mock_client = MagicMock()
+                    mock_client.chat.completions.create.return_value = mock_llm_resp
+                    mock_openai_cls.return_value = mock_client
+                    gen = QuizGenerator()
+                    result = gen.generate_quiz(user_id="1", notebook_id="1", num_questions=1)
+                    markdown = gen.format_quiz_markdown(result, title="Quiz")
+        assert "error" not in result
+        assert result["questions"][0]["options"] == ["A) One", "B) Two", "C) Three", "D) Four"]
+        assert "- A) One" in markdown
+        assert "- D) Four" in markdown
 # ── PodcastGenerator tests ────────────────────────────────────────────────────

tests/test_report_llm_providers.py ADDED Viewed

	@@ -0,0 +1,78 @@

+"""
+Provider-specific tests for report generation.
+"""
+from __future__ import annotations
+import os
+import pathlib
+import sys
+from unittest.mock import MagicMock, patch
+ROOT = pathlib.Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(ROOT))
+from src.artifacts.report_generator import ReportGenerator
+def _prepare_store(mock_store_cls):
+    mock_store = MagicMock()
+    mock_store.query.return_value = [
+        ("chunk-1", 0.1, {"document": "Context block for report generation.", "metadata": {}})
+    ]
+    mock_store_cls.return_value = mock_store
+def test_report_generator_ollama_provider_without_openai_key(tmp_path):
+    env = {
+        "STORAGE_BASE_DIR": str(tmp_path / "data"),
+        "REPORT_LLM_PROVIDER": "ollama",
+        "REPORT_LLM_MODEL": "qwen2.5:3b",
+        "OLLAMA_BASE_URL": "http://127.0.0.1:11434",
+        "OPENAI_API_KEY": "",
+    }
+    with patch.dict(os.environ, env, clear=False):
+        with patch("src.artifacts.report_generator.Path.exists", return_value=True):
+            with patch("src.artifacts.report_generator.ChromaAdapter") as mock_store_cls:
+                _prepare_store(mock_store_cls)
+                mock_resp = MagicMock()
+                mock_resp.raise_for_status.return_value = None
+                mock_resp.json.return_value = {"response": "# Report\n\nGenerated from Ollama."}
+                with patch("src.artifacts.report_generator.requests.post", return_value=mock_resp):
+                    generator = ReportGenerator(llm_provider="ollama")
+                    result = generator.generate_report("1", "1")
+    assert "error" not in result
+    assert "content" in result
+    assert "Generated from Ollama." in result["content"]
+    assert result["llm_provider"] == "ollama"
+def test_report_generator_groq_provider_without_openai_key(tmp_path):
+    env = {
+        "STORAGE_BASE_DIR": str(tmp_path / "data"),
+        "REPORT_LLM_PROVIDER": "groq",
+        "REPORT_LLM_MODEL": "llama-3.1-8b-instant",
+        "GROQ_API_KEY": "gsk-test",
+        "OPENAI_API_KEY": "",
+    }
+    with patch.dict(os.environ, env, clear=False):
+        with patch("src.artifacts.report_generator.Path.exists", return_value=True):
+            with patch("src.artifacts.report_generator.ChromaAdapter") as mock_store_cls:
+                _prepare_store(mock_store_cls)
+                with patch("groq.Groq") as mock_groq_cls:
+                    mock_groq = MagicMock()
+                    mock_groq.chat.completions.create.return_value = MagicMock(
+                        choices=[MagicMock(message=MagicMock(content="# Report\n\nGenerated from Groq."))]
+                    )
+                    mock_groq_cls.return_value = mock_groq
+                    generator = ReportGenerator(llm_provider="groq")
+                    result = generator.generate_report("1", "1")
+    assert "error" not in result
+    assert "content" in result
+    assert "Generated from Groq." in result["content"]
+    assert result["llm_provider"] == "groq"