Upload scripts/render_results.py with huggingface_hub

Browse files

Files changed (1) hide show

scripts/render_results.py +130 -0

scripts/render_results.py ADDED Viewed

	@@ -0,0 +1,130 @@

+"""Render the 18 sample annotations as a Markdown block to embed in README.md.
+Reads samples/{audioset,music,majestrino}/*.json and writes the Markdown to
+samples/results_block.md.
+"""
+from __future__ import annotations
+import json
+from pathlib import Path
+ROOT = Path(__file__).resolve().parent.parent
+SAMPLES = ROOT / "samples"
+DATASETS = [
+    {
+        "short": "audioset",
+        "title": "Source: `mitermix/audioset-with-grounded-captions`",
+        "link":  "https://huggingface.co/datasets/mitermix/audioset-with-grounded-captions",
+        "blurb": "AudioSet-derived clips with mixed content (speech, music, "
+                 "sound effects) — a good test of all three routes.",
+    },
+    {
+        "short": "music",
+        "title": "Source: `laion/captioned-ai-music-snippets`",
+        "link":  "https://huggingface.co/datasets/laion/captioned-ai-music-snippets",
+        "blurb": "AI-generated music snippets, primarily routed to the music "
+                 "captioner.",
+    },
+    {
+        "short": "majestrino",
+        "title": "Source: `TTS-AGI/majestrino-unified-detailed-captions-temporal`",
+        "link":  "https://huggingface.co/datasets/TTS-AGI/majestrino-unified-detailed-captions-temporal",
+        "blurb": "High-quality TTS-style speech recordings, primarily routed "
+                 "to the speech models.",
+    },
+]
+def render_clip(payload: dict, audio_rel: str) -> str:
+    out: list[str] = []
+    out.append(f"#### `{payload['audio_file']}`")
+    out.append("")
+    out.append(f"[Listen / download]({audio_rel})")
+    out.append("")
+    out.append("**AudioSet top-3 predictions** (MIT AST):")
+    out.append("")
+    out.append("| # | Label | Confidence |")
+    out.append("|---|---|---|")
+    for i, item in enumerate(payload["audioset_top3"], 1):
+        pct = 100.0 * item["confidence"]
+        out.append(f"| {i} | `{item['label']}` | {pct:.1f}% |")
+    out.append("")
+    out.append(f"**Route:** `{payload['route']}`")
+    out.append("")
+    ann = payload["annotations"]
+    if payload["route"] == "speech":
+        if "voice_tags" in ann:
+            out.append("**`laion/voice-tagging-whisper` — voice tags:**")
+            out.append("")
+            out.append(f"> {ann['voice_tags']}")
+            out.append("")
+        if "bud_e_speech_caption" in ann:
+            out.append("**`laion/BUD-E-Whisper_V1.2` — speech caption:**")
+            out.append("")
+            out.append(f"> {ann['bud_e_speech_caption']}")
+            out.append("")
+    elif payload["route"] == "music":
+        if "music_caption" in ann:
+            out.append("**`laion/music-whisper` — music caption:**")
+            out.append("")
+            out.append(f"> {ann['music_caption']}")
+            out.append("")
+    else:  # sfx
+        if "sound_effect_caption" in ann:
+            out.append("**`laion/sound-effect-captioning-whisper` — sound caption:**")
+            out.append("")
+            out.append(f"> {ann['sound_effect_caption']}")
+            out.append("")
+    out.append("---")
+    out.append("")
+    return "\n".join(out)
+def render_dataset(ds: dict) -> str:
+    out: list[str] = []
+    out.append(f"### {ds['title']}")
+    out.append("")
+    out.append(f"{ds['blurb']}  \nDataset: {ds['link']}")
+    out.append("")
+    json_files = sorted((SAMPLES / ds["short"]).glob("*.json"))
+    if not json_files:
+        out.append("_(no samples)_")
+        return "\n".join(out)
+    for jf in json_files:
+        payload = json.loads(jf.read_text())
+        # Audio file lives next to the JSON. We expose it via a relative path
+        # from the README at repo root, i.e. ./samples/<short>/<basename>
+        audio_basename = payload["audio_file"]
+        audio_rel = f"./samples/{ds['short']}/{audio_basename}"
+        out.append(render_clip(payload, audio_rel))
+    return "\n".join(out)
+def main() -> int:
+    blocks = ["## Sample annotations\n"]
+    blocks.append(
+        "The pipeline below was run end-to-end on **6 random audio clips "
+        "drawn from each of three Hugging Face datasets** "
+        "(18 clips total). For each clip we show the top-3 AudioSet "
+        "predictions from the MIT AST router, the route the clip was "
+        "dispatched to, and the resulting Whisper caption / tags. The "
+        "audio files themselves are mirrored in this repo under "
+        "[`samples/`](./samples) so you can listen along.\n"
+    )
+    for ds in DATASETS:
+        blocks.append(render_dataset(ds))
+    md = "\n".join(blocks)
+    out_path = SAMPLES / "results_block.md"
+    out_path.write_text(md)
+    print(f"Wrote {out_path}  ({len(md)} chars)")
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())