File size: 3,149 Bytes
8b98d83
 
 
 
 
 
 
 
 
 
 
 
28ea021
8b98d83
 
 
 
 
 
 
 
28ea021
 
8b98d83
28ea021
 
 
8b98d83
 
28ea021
8b98d83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28ea021
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
"""Demo data loader — bootstraps a pre-scored DB from bundled JSON."""

import json
import logging
import shutil
from pathlib import Path

log = logging.getLogger(__name__)


def load_demo():
    """Load demo data into a fresh database and copy demo config."""
    from src.config import CONFIG_PATH, DB_PATH

    json_path = Path("data/demo-data.json")
    config_src = Path("data/demo-config.yaml")

    if not json_path.exists():
        log.warning("Demo data not found at %s", json_path)
        return

    config_dst = CONFIG_PATH
    db_path = DB_PATH

    # Skip if already set up
    if config_dst.exists() or db_path.exists():
        log.info("Config or DB already exists — skipping demo load")
        return

    # Load DB first, then copy config
    from src.db import init_db, get_conn
    init_db()

    data = json.loads(json_path.read_text())
    runs = data.get("runs", [])
    papers = data.get("papers", [])

    with get_conn() as conn:
        for r in runs:
            conn.execute(
                """INSERT OR IGNORE INTO runs (id, domain, started_at, finished_at,
                   date_start, date_end, paper_count, status)
                   VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
                (r["id"], r["domain"], r["started_at"], r["finished_at"],
                 r["date_start"], r["date_end"], r["paper_count"], r["status"]),
            )

        for p in papers:
            conn.execute(
                """INSERT INTO papers (run_id, domain, arxiv_id, entry_id, title,
                   authors, abstract, published, categories, pdf_url, arxiv_url,
                   comment, source, github_repo, github_stars, hf_upvotes,
                   hf_models, hf_datasets, hf_spaces, score_axis_1, score_axis_2,
                   score_axis_3, composite, summary, reasoning, code_url,
                   s2_tldr, s2_paper_id, topics)
                   VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""",
                (p.get("run_id"), p.get("domain"), p.get("arxiv_id"), p.get("entry_id"),
                 p.get("title"), p.get("authors"), p.get("abstract"), p.get("published"),
                 p.get("categories"), p.get("pdf_url"), p.get("arxiv_url"),
                 p.get("comment"), p.get("source"), p.get("github_repo"),
                 p.get("github_stars"), p.get("hf_upvotes"), p.get("hf_models"),
                 p.get("hf_datasets"), p.get("hf_spaces"), p.get("score_axis_1"),
                 p.get("score_axis_2"), p.get("score_axis_3"), p.get("composite"),
                 p.get("summary"), p.get("reasoning"), p.get("code_url"),
                 p.get("s2_tldr"), p.get("s2_paper_id"), p.get("topics")),
            )

        conn.execute("INSERT INTO papers_fts(papers_fts) VALUES('rebuild')")

    log.info("Demo data loaded: %d runs, %d papers into %s", len(runs), len(papers), db_path)

    # Copy config last (entrypoint checks config existence to set DEMO_MODE)
    if config_src.exists():
        config_dst.parent.mkdir(parents=True, exist_ok=True)
        shutil.copy2(config_src, config_dst)
        log.info("Demo config written to %s", config_dst)