Spaces:
Sleeping
Sleeping
| """Demo data loader — bootstraps a pre-scored DB from bundled JSON.""" | |
| import json | |
| import logging | |
| import shutil | |
| from pathlib import Path | |
| log = logging.getLogger(__name__) | |
| def load_demo(): | |
| """Load demo data into a fresh database and copy demo config.""" | |
| from src.config import CONFIG_PATH, DB_PATH | |
| json_path = Path("data/demo-data.json") | |
| config_src = Path("data/demo-config.yaml") | |
| if not json_path.exists(): | |
| log.warning("Demo data not found at %s", json_path) | |
| return | |
| config_dst = CONFIG_PATH | |
| db_path = DB_PATH | |
| # Skip if already set up | |
| if config_dst.exists() or db_path.exists(): | |
| log.info("Config or DB already exists — skipping demo load") | |
| return | |
| # Load DB first, then copy config | |
| from src.db import init_db, get_conn | |
| init_db() | |
| data = json.loads(json_path.read_text()) | |
| runs = data.get("runs", []) | |
| papers = data.get("papers", []) | |
| with get_conn() as conn: | |
| for r in runs: | |
| conn.execute( | |
| """INSERT OR IGNORE INTO runs (id, domain, started_at, finished_at, | |
| date_start, date_end, paper_count, status) | |
| VALUES (?, ?, ?, ?, ?, ?, ?, ?)""", | |
| (r["id"], r["domain"], r["started_at"], r["finished_at"], | |
| r["date_start"], r["date_end"], r["paper_count"], r["status"]), | |
| ) | |
| for p in papers: | |
| conn.execute( | |
| """INSERT INTO papers (run_id, domain, arxiv_id, entry_id, title, | |
| authors, abstract, published, categories, pdf_url, arxiv_url, | |
| comment, source, github_repo, github_stars, hf_upvotes, | |
| hf_models, hf_datasets, hf_spaces, score_axis_1, score_axis_2, | |
| score_axis_3, composite, summary, reasoning, code_url, | |
| s2_tldr, s2_paper_id, topics) | |
| VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", | |
| (p.get("run_id"), p.get("domain"), p.get("arxiv_id"), p.get("entry_id"), | |
| p.get("title"), p.get("authors"), p.get("abstract"), p.get("published"), | |
| p.get("categories"), p.get("pdf_url"), p.get("arxiv_url"), | |
| p.get("comment"), p.get("source"), p.get("github_repo"), | |
| p.get("github_stars"), p.get("hf_upvotes"), p.get("hf_models"), | |
| p.get("hf_datasets"), p.get("hf_spaces"), p.get("score_axis_1"), | |
| p.get("score_axis_2"), p.get("score_axis_3"), p.get("composite"), | |
| p.get("summary"), p.get("reasoning"), p.get("code_url"), | |
| p.get("s2_tldr"), p.get("s2_paper_id"), p.get("topics")), | |
| ) | |
| conn.execute("INSERT INTO papers_fts(papers_fts) VALUES('rebuild')") | |
| log.info("Demo data loaded: %d runs, %d papers into %s", len(runs), len(papers), db_path) | |
| # Copy config last (entrypoint checks config existence to set DEMO_MODE) | |
| if config_src.exists(): | |
| config_dst.parent.mkdir(parents=True, exist_ok=True) | |
| shutil.copy2(config_src, config_dst) | |
| log.info("Demo config written to %s", config_dst) | |