| """Export learning outputs to JSON or Markdown.""" |
|
|
| from __future__ import annotations |
|
|
| import html |
| from pathlib import Path |
| from typing import Literal |
|
|
| from pydantic import BaseModel |
|
|
| from src.schemas import Citation, FlashcardSet, QuizSet, RagAnswer, Summary |
|
|
| ExportFormat = Literal["text", "md", "json"] |
|
|
|
|
| def _citation_line(c: Citation) -> str: |
| parts = [f"[{c.source_marker}] {c.filename} p.{c.page}"] |
| if c.section: |
| parts.append(f"section: {c.section}") |
| if c.chunk_id: |
| parts.append(f"chunk: {c.chunk_id}") |
| return " | ".join(parts) |
|
|
|
|
| def _details_block(summary: str, content: str) -> str: |
| safe_summary = html.escape(summary, quote=False) |
| safe_content = html.escape(content, quote=False) |
| return ( |
| "<details>" |
| f"<summary>{safe_summary}</summary>" |
| f"<div style=\"margin:8px 0 0 0; white-space:pre-wrap;\">{safe_content}</div>" |
| "</details>" |
| ) |
|
|
|
|
| def _citation_source_text_block(c: Citation) -> str: |
| if not c.source_text: |
| return "" |
| return "\n " + _details_block("Xem đoạn nguồn", c.source_text) |
|
|
|
|
| def _marker_details(citations: list[Citation], markers: list[str]) -> list[str]: |
| by_marker = {c.source_marker: c for c in citations} |
| lines: list[str] = [] |
| for m in markers: |
| c = by_marker.get(m) |
| if c is None: |
| continue |
| summary = f"[{c.source_marker}] {c.filename} p.{c.page}" |
| if c.section: |
| summary += f" | section: {c.section}" |
| if c.chunk_id: |
| summary += f" | chunk: {c.chunk_id}" |
| if c.source_text: |
| lines.append(f"- {_details_block(summary, c.source_text)}") |
| else: |
| lines.append(f"- {summary}") |
| return lines |
|
|
|
|
| def _citations_block(citations: list[Citation]) -> str: |
| if not citations: |
| return "" |
| lines = ["## Sources", ""] |
| for c in citations: |
| lines.append(f"- {_citation_line(c)}{_citation_source_text_block(c)}") |
| return "\n".join(lines) + "\n" |
|
|
|
|
| def _render_with_sources(body_lines: list[str], citations: list[Citation]) -> str: |
| lines = [*body_lines, ""] |
| c = _citations_block(citations) |
| if c: |
| lines.append(c) |
| return "\n".join(lines).rstrip() + "\n" |
|
|
|
|
| def _to_markdown(model: BaseModel) -> str: |
| if isinstance(model, Summary): |
| title = "# Summary" + (f": {model.target}" if model.target else "") |
| lines: list[str] = [title, "", f"_Scope: {model.scope}_", ""] |
| if model.summary: |
| lines.extend([model.summary.strip(), ""]) |
| if model.key_points: |
| lines.extend(["## Key Points", "", *[f"- {kp}" for kp in model.key_points], ""]) |
| return _render_with_sources(lines, model.citations) |
|
|
| if isinstance(model, RagAnswer): |
| return _render_with_sources([model.answer.strip()], model.citations) |
|
|
| if isinstance(model, QuizSet): |
| title = "# Quiz" + (f": {model.target}" if model.target else "") |
| lines = [title, "", f"_Scope: {model.scope} | Items: {len(model.items)}_", ""] |
| for idx, item in enumerate(model.items, start=1): |
| meta_parts: list[str] = [] |
| if item.topic: |
| meta_parts.append(f"topic: {item.topic}") |
| if item.difficulty: |
| meta_parts.append(f"difficulty: {item.difficulty}") |
| meta_suffix = f" _({' | '.join(meta_parts)})_" if meta_parts else "" |
|
|
| lines.extend([f"## Q{idx}.{meta_suffix}", "", item.question.strip(), ""]) |
| for opt_idx, option in enumerate(item.options): |
| lines.append(f"- {chr(ord('A') + opt_idx)}) {option}") |
| lines.append("") |
| lines.append(f"**Answer:** {chr(ord('A') + item.correct_index)}") |
| if item.explanation: |
| lines.append(f"**Explanation:** {item.explanation.strip()}") |
| if item.source_markers: |
| lines.extend(["**Sources:**", *_marker_details(model.citations, item.source_markers)]) |
| lines.append("") |
|
|
| c = _citations_block(model.citations) |
| if c: |
| lines.append(c) |
| return "\n".join(lines).rstrip() + "\n" |
|
|
| if isinstance(model, FlashcardSet): |
| title = "# Flashcards" + (f": {model.target}" if model.target else "") |
| lines = [title, "", f"_Scope: {model.scope} | Cards: {len(model.cards)}_", ""] |
| for idx, card in enumerate(model.cards, start=1): |
| topic = f" — {card.topic}" if card.topic else "" |
| lines.extend([f"## Card {idx}{topic}", ""]) |
| lines.append(f"**Front:** {card.front.strip()}") |
| lines.append(f"**Back:** {card.back.strip()}") |
| if card.hint: |
| lines.append(f"**Hint:** {card.hint.strip()}") |
| if card.source_markers: |
| lines.extend(["**Sources:**", *_marker_details(model.citations, card.source_markers)]) |
| lines.append("") |
|
|
| c = _citations_block(model.citations) |
| if c: |
| lines.append(c) |
| return "\n".join(lines).rstrip() + "\n" |
|
|
| raise TypeError(f"Unsupported model type: {type(model).__name__}") |
|
|
|
|
| def export( |
| model: BaseModel, *, fmt: ExportFormat = "text", output: Path | None = None |
| ) -> str | Path: |
| """Render model to a string, optionally writing it to disk. |
| |
| Args: model, fmt, output (optional). |
| Returns: rendered string if output is None; otherwise the written path. |
| Raises: TypeError for unsupported model type; ValueError for unknown fmt. |
| """ |
| if fmt == "json": |
| text = model.model_dump_json(indent=2) + "\n" |
| elif fmt in {"text", "md"}: |
| text = _to_markdown(model) |
| else: |
| raise ValueError(f"Unknown fmt '{fmt}'. Expected 'text' | 'md' | 'json'.") |
|
|
| if output is None: |
| return text |
|
|
| output.parent.mkdir(parents=True, exist_ok=True) |
| output.write_text(text, encoding="utf-8") |
| return output |
|
|
|
|