from __future__ import annotations from pathlib import Path import re from fastapi.testclient import TestClient from hakari_bench.viewer.app import create_app from hakari_bench.viewer.docs import BenchmarkDocs, render_markdown_to_html from hakari_bench.viewer.store import DuckDbLocation, LocalDuckDbStore from tests.test_viewer import _write_task_results def test_benchmark_docs_resolves_group_and_task_overviews(tmp_path: Path) -> None: docs_dir = tmp_path / "docs" / "benchmark_tasks" group_dir = docs_dir / "NanoMIRACL" group_dir.mkdir(parents=True) (group_dir / "index.md").write_text( "# NanoMIRACL\n\n## Overview\n\nGroup overview text.\n\n## Details\n\nDetails text.\n", encoding="utf-8", ) (group_dir / "ja.md").write_text( "# NanoMIRACL / ja\n\n## Overview\n\nJapanese task overview.\n\n## Details\n\nMore detail.\n", encoding="utf-8", ) docs = BenchmarkDocs(docs_dir) group_doc = docs.group_doc("NanoMIRACL") task_doc = docs.task_doc(view_name="NanoMIRACL", metric_column="ja") assert group_doc is not None assert group_doc.title == "NanoMIRACL" assert group_doc.description == "Group overview text." assert group_doc.url == "/docs/benchmark-tasks/NanoMIRACL" assert task_doc is not None assert task_doc.title == "NanoMIRACL / ja" assert task_doc.description == "Japanese task overview." assert task_doc.url == "/docs/benchmark-tasks/NanoMIRACL/ja" def test_benchmark_docs_lists_group_docs_with_descriptions(tmp_path: Path) -> None: docs_dir = tmp_path / "docs" / "benchmark_tasks" miracl_dir = docs_dir / "NanoMIRACL" coir_dir = docs_dir / "NanoCoIR" empty_dir = docs_dir / "NoIndex" miracl_dir.mkdir(parents=True) coir_dir.mkdir() empty_dir.mkdir() (miracl_dir / "index.md").write_text("# NanoMIRACL\n\n## Overview\n\nMIRACL overview.\n", encoding="utf-8") (coir_dir / "index.md").write_text("# NanoCoIR\n\n## Overview\n\nCoIR overview.\n", encoding="utf-8") docs = BenchmarkDocs(docs_dir) group_docs = docs.group_docs() assert [doc.title for doc in group_docs] == ["NanoCoIR", "NanoMIRACL"] assert [doc.url for doc in group_docs] == ["/docs/benchmark-tasks/NanoCoIR", "/docs/benchmark-tasks/NanoMIRACL"] assert [doc.description for doc in group_docs] == ["CoIR overview.", "MIRACL overview."] def test_benchmark_docs_resolves_mnanobeir_task_key_documents(tmp_path: Path) -> None: docs_dir = tmp_path / "docs" / "benchmark_tasks" group_dir = docs_dir / "MNanoBEIR" group_dir.mkdir(parents=True) (group_dir / "NanoBEIR-ja__NanoMSMARCO.md").write_text( "# MNanoBEIR / NanoBEIR-ja / NanoMSMARCO\n\n## Overview\n\nMS MARCO overview.\n", encoding="utf-8", ) docs = BenchmarkDocs(docs_dir) doc = docs.task_doc( view_name="MNanoBEIR", metric_column="MNanoBEIR::hakari-bench/NanoBEIR-ja::NanoMSMARCO", ) assert doc is not None assert doc.url == "/docs/benchmark-tasks/MNanoBEIR/NanoBEIR-ja__NanoMSMARCO" assert doc.description == "MS MARCO overview." def test_benchmark_docs_resolves_nanobeir_short_task_aliases(tmp_path: Path) -> None: docs_dir = tmp_path / "docs" / "benchmark_tasks" group_dir = docs_dir / "MNanoBEIR" group_dir.mkdir(parents=True) (group_dir / "NanoBEIR-ja__NanoArguAna.md").write_text( "# MNanoBEIR / NanoBEIR-ja / NanoArguAna\n\n## Overview\n\nArguAna overview.\n", encoding="utf-8", ) docs = BenchmarkDocs(docs_dir) doc = docs.task_doc( view_name="MNanoBEIR", metric_column="MNanoBEIR::hakari-bench/NanoBEIR-ja::arguana", ) assert doc is not None assert doc.title == "MNanoBEIR / NanoBEIR-ja / NanoArguAna" assert doc.url == "/docs/benchmark-tasks/MNanoBEIR/NanoBEIR-ja__NanoArguAna" assert doc.description == "ArguAna overview." def test_markdown_renderer_escapes_html_and_renders_basic_markdown() -> None: html = render_markdown_to_html( "# Title\n\n## Overview\n\nA **bold phrase**, [safe link](https://example.com), and .\n\n- one\n- two\n" ) assert "

Title

" in html assert "bold phrase" in html assert "**bold phrase**" not in html assert 'safe link' in html assert "<script>x</script>" in html assert "