| | from __future__ import annotations |
| |
|
| | import json |
| | import tempfile |
| | from pathlib import Path |
| |
|
| | import gradio as gr |
| | from huggingface_hub import hf_hub_download |
| |
|
| | from modular_graph_and_candidates import ( |
| | build_graph_json, |
| | generate_html, |
| | build_timeline_json, |
| | generate_timeline_html, |
| | filter_graph_by_threshold, |
| | ) |
| |
|
| | def _escape_srcdoc(text: str) -> str: |
| | return ( |
| | text.replace("&", "&") |
| | .replace("\"", """) |
| | .replace("'", "'") |
| | .replace("<", "<") |
| | .replace(">", ">") |
| | ) |
| |
|
| | HF_MAIN_REPO = "https://github.com/huggingface/transformers" |
| | CACHE_REPO = "Molbap/hf_cached_embeds_log" |
| |
|
| | def _fetch_from_cache_repo(kind: str, sim_method: str, threshold: float, multimodal: bool, *, height_vh: int = 85): |
| | repo_id = CACHE_REPO |
| | latest_fp = hf_hub_download(repo_id=repo_id, filename="latest.json", repo_type="dataset") |
| | info = json.loads(Path(latest_fp).read_text(encoding="utf-8")) |
| | sha = info.get("sha") |
| | key = f"{sha}/{sim_method}-m{int(multimodal)}" |
| | json_fp = hf_hub_download(repo_id=repo_id, filename=f"{kind}/{key}.json", repo_type="dataset") |
| |
|
| | raw_data = json.loads(Path(json_fp).read_text(encoding="utf-8")) |
| | filtered_data = filter_graph_by_threshold(raw_data, threshold) |
| |
|
| | if kind == "timeline": |
| | raw_html = generate_timeline_html(filtered_data) |
| | else: |
| | raw_html = generate_html(filtered_data) |
| |
|
| | iframe_html = f'<iframe style="width:100%;height:{height_vh}vh;border:none;" srcdoc="{_escape_srcdoc(raw_html)}"></iframe>' |
| | tmp = Path(tempfile.mkstemp(suffix=("_timeline.json" if kind == "timeline" else ".json"))[1]) |
| | tmp.write_text(json.dumps(filtered_data), encoding="utf-8") |
| | return iframe_html, str(tmp) |
| |
|
| | def run_loc(sim_method: str, multimodal: bool, *, height_vh: int = 85): |
| | latest_fp = hf_hub_download(repo_id=CACHE_REPO, filename="latest.json", repo_type="dataset") |
| | info = json.loads(Path(latest_fp).read_text(encoding="utf-8")) |
| | sha = info["sha"] |
| | key = f"{sha}/{sim_method}-m{int(multimodal)}" |
| | html_fp = hf_hub_download(repo_id=CACHE_REPO, filename=f"loc/{key}.html", repo_type="dataset") |
| | raw_html = Path(html_fp).read_text(encoding="utf-8") |
| | iframe_html = f'<iframe style="width:100%;height:{height_vh}vh;border:none;" srcdoc="{_escape_srcdoc(raw_html)}"></iframe>' |
| | return iframe_html |
| |
|
| | def run_graph(repo_url: str, threshold: float, multimodal: bool, sim_method: str, *, height_vh: int = 85): |
| | return _fetch_from_cache_repo("graph", sim_method, threshold, multimodal, height_vh=height_vh) |
| |
|
| | def run_timeline(repo_url: str, threshold: float, multimodal: bool, sim_method: str, *, height_vh: int = 85): |
| | return _fetch_from_cache_repo("timeline", sim_method, threshold, multimodal, height_vh=height_vh) |
| |
|
| | |
| |
|
| | CUSTOM_CSS = """ |
| | #graph_html iframe, #timeline_html iframe {height:85vh !important; width:100% !important; border:none;} |
| | """ |
| |
|
| | TAB_INDEX = {"timeline": 0, "loc": 1, "graph": 2} |
| |
|
| | with gr.Blocks() as demo: |
| | html = gr.HTML() |
| | def _load(): |
| | return run_loc(sim_method="jaccard", multimodal=False) |
| | demo.load(_load, outputs=[html]) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |
| |
|