File size: 2,870 Bytes
e86fcc2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
"""Task 6: Migrate all non-canonical topic slugs to the 11 canonical topics.
Units whose topic maps to nothing and isn't already canonical are soft-deleted.
Uses a _topic_backup column as rollback source.
"""
import argparse
import sqlite3
import sys

sys.path.insert(0, "backend")
from app.math_wiki.taxonomy import CANONICAL_TOPICS, TOPIC_MAP

DB_PATH = "math_wiki.db"


def main(dry_run: bool) -> None:
    conn = sqlite3.connect(DB_PATH)
    conn.row_factory = sqlite3.Row

    # Ensure backup column exists
    existing_cols = {row[1] for row in conn.execute("PRAGMA table_info(wiki_units)").fetchall()}
    if "_topic_backup" not in existing_cols:
        conn.execute("ALTER TABLE wiki_units ADD COLUMN _topic_backup TEXT")
        conn.commit()
        print("Added _topic_backup column.")

    # Snapshot current topics
    rows = conn.execute(
        "SELECT id, topic FROM wiki_units WHERE deleted=0"
    ).fetchall()

    updates: list[tuple[str, str]] = []   # (new_topic, id)
    deletes: list[str] = []
    counts: dict[str, int] = {}

    for row in rows:
        topic = row["topic"]
        if topic in CANONICAL_TOPICS:
            continue
        canonical = TOPIC_MAP.get(topic)
        if canonical:
            updates.append((canonical, row["id"]))
            counts[f"{topic}{canonical}"] = counts.get(f"{topic}{canonical}", 0) + 1
        else:
            deletes.append(row["id"])
            counts[f"DELETE:{topic}"] = counts.get(f"DELETE:{topic}", 0) + 1

    print("Topic migration plan:")
    for mapping, cnt in sorted(counts.items()):
        print(f"  {mapping}: {cnt} units")
    print(f"\nTotal updates: {len(updates)}, soft-deletes: {len(deletes)}")

    if dry_run:
        print("\nDRY RUN — no changes made.")
        conn.close()
        return

    # Backup existing topics before mutation
    conn.execute(
        "UPDATE wiki_units SET _topic_backup=topic WHERE _topic_backup IS NULL AND deleted=0"
    )
    conn.commit()

    for new_topic, uid in updates:
        conn.execute("UPDATE wiki_units SET topic=? WHERE id=?", (new_topic, uid))
    for uid in deletes:
        conn.execute("UPDATE wiki_units SET deleted=1 WHERE id=?", (uid,))
    conn.commit()

    final = conn.execute(
        "SELECT DISTINCT topic FROM wiki_units WHERE deleted=0 ORDER BY topic"
    ).fetchall()
    print("\nDistinct topics after migration:")
    for r in final:
        print(f"  {r[0]}")

    non_canonical = [r[0] for r in final if r[0] not in CANONICAL_TOPICS]
    if non_canonical:
        print(f"\nWARNING: non-canonical topics still present: {non_canonical}")
    else:
        print("\nAll topics are canonical.")

    conn.close()


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--dry-run", action="store_true")
    args = parser.parse_args()
    main(args.dry_run)