"""Demo data loader — bootstraps a pre-scored DB from bundled JSON.""" import json import logging import shutil from pathlib import Path log = logging.getLogger(__name__) def load_demo(): """Load demo data into a fresh database and copy demo config.""" from src.config import CONFIG_PATH, DB_PATH json_path = Path("data/demo-data.json") config_src = Path("data/demo-config.yaml") if not json_path.exists(): log.warning("Demo data not found at %s", json_path) return config_dst = CONFIG_PATH db_path = DB_PATH # Skip if already set up if config_dst.exists() or db_path.exists(): log.info("Config or DB already exists — skipping demo load") return # Load DB first, then copy config from src.db import init_db, get_conn init_db() data = json.loads(json_path.read_text()) runs = data.get("runs", []) papers = data.get("papers", []) with get_conn() as conn: for r in runs: conn.execute( """INSERT OR IGNORE INTO runs (id, domain, started_at, finished_at, date_start, date_end, paper_count, status) VALUES (?, ?, ?, ?, ?, ?, ?, ?)""", (r["id"], r["domain"], r["started_at"], r["finished_at"], r["date_start"], r["date_end"], r["paper_count"], r["status"]), ) for p in papers: conn.execute( """INSERT INTO papers (run_id, domain, arxiv_id, entry_id, title, authors, abstract, published, categories, pdf_url, arxiv_url, comment, source, github_repo, github_stars, hf_upvotes, hf_models, hf_datasets, hf_spaces, score_axis_1, score_axis_2, score_axis_3, composite, summary, reasoning, code_url, s2_tldr, s2_paper_id, topics) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", (p.get("run_id"), p.get("domain"), p.get("arxiv_id"), p.get("entry_id"), p.get("title"), p.get("authors"), p.get("abstract"), p.get("published"), p.get("categories"), p.get("pdf_url"), p.get("arxiv_url"), p.get("comment"), p.get("source"), p.get("github_repo"), p.get("github_stars"), p.get("hf_upvotes"), p.get("hf_models"), p.get("hf_datasets"), p.get("hf_spaces"), p.get("score_axis_1"), p.get("score_axis_2"), p.get("score_axis_3"), p.get("composite"), p.get("summary"), p.get("reasoning"), p.get("code_url"), p.get("s2_tldr"), p.get("s2_paper_id"), p.get("topics")), ) conn.execute("INSERT INTO papers_fts(papers_fts) VALUES('rebuild')") log.info("Demo data loaded: %d runs, %d papers into %s", len(runs), len(papers), db_path) # Copy config last (entrypoint checks config existence to set DEMO_MODE) if config_src.exists(): config_dst.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(config_src, config_dst) log.info("Demo config written to %s", config_dst)