Spaces:
Running
Running
| """Task 6: Migrate all non-canonical topic slugs to the 11 canonical topics. | |
| Units whose topic maps to nothing and isn't already canonical are soft-deleted. | |
| Uses a _topic_backup column as rollback source. | |
| """ | |
| import argparse | |
| import sqlite3 | |
| import sys | |
| sys.path.insert(0, "backend") | |
| from app.math_wiki.taxonomy import CANONICAL_TOPICS, TOPIC_MAP | |
| DB_PATH = "math_wiki.db" | |
| def main(dry_run: bool) -> None: | |
| conn = sqlite3.connect(DB_PATH) | |
| conn.row_factory = sqlite3.Row | |
| # Ensure backup column exists | |
| existing_cols = {row[1] for row in conn.execute("PRAGMA table_info(wiki_units)").fetchall()} | |
| if "_topic_backup" not in existing_cols: | |
| conn.execute("ALTER TABLE wiki_units ADD COLUMN _topic_backup TEXT") | |
| conn.commit() | |
| print("Added _topic_backup column.") | |
| # Snapshot current topics | |
| rows = conn.execute( | |
| "SELECT id, topic FROM wiki_units WHERE deleted=0" | |
| ).fetchall() | |
| updates: list[tuple[str, str]] = [] # (new_topic, id) | |
| deletes: list[str] = [] | |
| counts: dict[str, int] = {} | |
| for row in rows: | |
| topic = row["topic"] | |
| if topic in CANONICAL_TOPICS: | |
| continue | |
| canonical = TOPIC_MAP.get(topic) | |
| if canonical: | |
| updates.append((canonical, row["id"])) | |
| counts[f"{topic} → {canonical}"] = counts.get(f"{topic} → {canonical}", 0) + 1 | |
| else: | |
| deletes.append(row["id"]) | |
| counts[f"DELETE:{topic}"] = counts.get(f"DELETE:{topic}", 0) + 1 | |
| print("Topic migration plan:") | |
| for mapping, cnt in sorted(counts.items()): | |
| print(f" {mapping}: {cnt} units") | |
| print(f"\nTotal updates: {len(updates)}, soft-deletes: {len(deletes)}") | |
| if dry_run: | |
| print("\nDRY RUN — no changes made.") | |
| conn.close() | |
| return | |
| # Backup existing topics before mutation | |
| conn.execute( | |
| "UPDATE wiki_units SET _topic_backup=topic WHERE _topic_backup IS NULL AND deleted=0" | |
| ) | |
| conn.commit() | |
| for new_topic, uid in updates: | |
| conn.execute("UPDATE wiki_units SET topic=? WHERE id=?", (new_topic, uid)) | |
| for uid in deletes: | |
| conn.execute("UPDATE wiki_units SET deleted=1 WHERE id=?", (uid,)) | |
| conn.commit() | |
| final = conn.execute( | |
| "SELECT DISTINCT topic FROM wiki_units WHERE deleted=0 ORDER BY topic" | |
| ).fetchall() | |
| print("\nDistinct topics after migration:") | |
| for r in final: | |
| print(f" {r[0]}") | |
| non_canonical = [r[0] for r in final if r[0] not in CANONICAL_TOPICS] | |
| if non_canonical: | |
| print(f"\nWARNING: non-canonical topics still present: {non_canonical}") | |
| else: | |
| print("\nAll topics are canonical.") | |
| conn.close() | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--dry-run", action="store_true") | |
| args = parser.parse_args() | |
| main(args.dry_run) | |