Spaces:
Sleeping
Sleeping
File size: 3,149 Bytes
8b98d83 28ea021 8b98d83 28ea021 8b98d83 28ea021 8b98d83 28ea021 8b98d83 28ea021 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 | """Demo data loader — bootstraps a pre-scored DB from bundled JSON."""
import json
import logging
import shutil
from pathlib import Path
log = logging.getLogger(__name__)
def load_demo():
"""Load demo data into a fresh database and copy demo config."""
from src.config import CONFIG_PATH, DB_PATH
json_path = Path("data/demo-data.json")
config_src = Path("data/demo-config.yaml")
if not json_path.exists():
log.warning("Demo data not found at %s", json_path)
return
config_dst = CONFIG_PATH
db_path = DB_PATH
# Skip if already set up
if config_dst.exists() or db_path.exists():
log.info("Config or DB already exists — skipping demo load")
return
# Load DB first, then copy config
from src.db import init_db, get_conn
init_db()
data = json.loads(json_path.read_text())
runs = data.get("runs", [])
papers = data.get("papers", [])
with get_conn() as conn:
for r in runs:
conn.execute(
"""INSERT OR IGNORE INTO runs (id, domain, started_at, finished_at,
date_start, date_end, paper_count, status)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
(r["id"], r["domain"], r["started_at"], r["finished_at"],
r["date_start"], r["date_end"], r["paper_count"], r["status"]),
)
for p in papers:
conn.execute(
"""INSERT INTO papers (run_id, domain, arxiv_id, entry_id, title,
authors, abstract, published, categories, pdf_url, arxiv_url,
comment, source, github_repo, github_stars, hf_upvotes,
hf_models, hf_datasets, hf_spaces, score_axis_1, score_axis_2,
score_axis_3, composite, summary, reasoning, code_url,
s2_tldr, s2_paper_id, topics)
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""",
(p.get("run_id"), p.get("domain"), p.get("arxiv_id"), p.get("entry_id"),
p.get("title"), p.get("authors"), p.get("abstract"), p.get("published"),
p.get("categories"), p.get("pdf_url"), p.get("arxiv_url"),
p.get("comment"), p.get("source"), p.get("github_repo"),
p.get("github_stars"), p.get("hf_upvotes"), p.get("hf_models"),
p.get("hf_datasets"), p.get("hf_spaces"), p.get("score_axis_1"),
p.get("score_axis_2"), p.get("score_axis_3"), p.get("composite"),
p.get("summary"), p.get("reasoning"), p.get("code_url"),
p.get("s2_tldr"), p.get("s2_paper_id"), p.get("topics")),
)
conn.execute("INSERT INTO papers_fts(papers_fts) VALUES('rebuild')")
log.info("Demo data loaded: %d runs, %d papers into %s", len(runs), len(papers), db_path)
# Copy config last (entrypoint checks config existence to set DEMO_MODE)
if config_src.exists():
config_dst.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(config_src, config_dst)
log.info("Demo config written to %s", config_dst)
|