Sibyl-Memory / sibyl-memory-cli /tests /test_migrate.py
sibyllabs's picture
release: sibyl-memory-cli 0.3.9 (guided `sibyl migrate` + Codex + Claude MCP fix)
c5a913d
"""Tests for the `sibyl setup` guided-flow phases (migrate.py).
Exercises every DETERMINISTIC phase against a fake home: scan, backup (+byte verify
+ source-untouched), Codex wirer, extraction prompt, DB verify, heuristic lean,
and the confirmed-debloat safety gate (refuses without a backup).
"""
import os
from pathlib import Path
import pytest
from sibyl_memory_cli import migrate as M
from sibyl_memory_client import MemoryClient
BLOATED_CLAUDE = """# Project Atlas
## Identity
You are the Atlas build agent. Stay in scope.
## Rules
- never force-push
- run tests before commit
## Accumulated memory
- user prefers tabs over spaces
- API base is https://api.atlas.local
- met with Jordan about the Q3 roadmap on 2026-04-02
- the staging DB password rotates monthly
- learned: the flaky test is test_pipeline::test_retry
- project uses pnpm not npm
""" * 1 # ~ real-ish bloat
def _fake_home(tmp_path: Path) -> Path:
home = tmp_path / "home"
(home / "myproj").mkdir(parents=True)
(home / "myproj" / "CLAUDE.md").write_text(BLOATED_CLAUDE, encoding="utf-8")
(home / "AGENTS.md").write_text("# Agents\nuser likes concise answers\n", encoding="utf-8")
(home / ".codex").mkdir()
(home / ".codex" / "config.toml").write_text('model = "o4"\n', encoding="utf-8")
(home / ".hermes" / "memory").mkdir(parents=True)
(home / ".hermes" / "config.yaml").write_text("memory:\n provider: flatfile\n", encoding="utf-8")
(home / ".hermes" / "memory" / "notes.md").write_text("remembered: deploy on fridays\n", encoding="utf-8")
return home
def test_scan_finds_files_across_harnesses(tmp_path):
home = _fake_home(tmp_path)
found = M.scan_memory_files(home, cwd=home / "myproj")
rels = {f.rel for f in found}
assert any("CLAUDE.md" in r for r in rels)
assert "AGENTS.md" in rels
assert ".codex/config.toml" in rels
assert ".hermes/config.yaml" in rels
# the hermes memory dir is captured as a directory
assert any(f.is_dir and "memory" in f.rel for f in found)
def test_backup_copies_verifies_and_leaves_sources_untouched(tmp_path):
home = _fake_home(tmp_path)
src = home / "myproj" / "CLAUDE.md"
src_bytes, src_mtime = src.read_bytes(), src.stat().st_mtime
found = M.scan_memory_files(home, cwd=home / "myproj")
res = M.run_backup(found, tmp_path / "backups")
assert res.ok, res.error
assert res.backup_dir.name.startswith("sibyl-migration-backup-")
assert res.total_bytes > 0 and len(res.files) >= 4
# backup contains a copy of CLAUDE.md
assert any((res.backup_dir / r).exists() for r in res.files)
# SOURCES UNTOUCHED
assert src.read_bytes() == src_bytes
assert src.stat().st_mtime == src_mtime
def test_codex_wirer_detect_and_instructions(tmp_path):
home = _fake_home(tmp_path)
w = M.CodexWirer(config_path=home / ".codex" / "config.toml")
assert w.is_present()
st = w.current_state()
assert st["config_exists"] and not st["wired_with_sibyl"]
instr = w.instructions()
assert any("mcp_servers.sibyl_memory" in ln for ln in instr)
def test_wire_instructions_cover_all_harnesses():
for h in ("claude-code", "codex", "hermes", "something-else"):
assert isinstance(M.wire_instructions(h), list) and M.wire_instructions(h)
assert "claude mcp add" in " ".join(M.wire_instructions("claude-code"))
def test_extraction_prompt_reads_from_backup_only(tmp_path):
p = M.extraction_prompt("claude-code", tmp_path / "bk")
assert "Read ONLY from the backup" in p
assert "Do not edit, trim, or delete any live file" in p
assert str(tmp_path / "bk") in p
def test_db_baseline_and_verify_new(tmp_path):
db = tmp_path / ".sibyl-memory" / "memory.db"
db.parent.mkdir(parents=True)
assert M.db_baseline(db) == 0 # no DB rows yet
c = MemoryClient.local(str(db), tenant_id="qa")
baseline = M.db_baseline(db)
c.set_entity("facts", "api_base", {"value": "https://api.atlas.local"})
c.set_entity("preferences", "indent", {"value": "tabs"})
c.set_entity("relationships", "jordan", {"note": "Q3 roadmap"})
v = M.verify_new_entries(db, baseline)
assert v["ok"] and v["new_total"] == 3
assert set(v["by_category"]) >= {"facts", "preferences", "relationships"}
def test_heuristic_lean_keepblock_and_first_section():
# explicit keep-block wins
t = "junk\n<!-- sibyl:keep -->\nCORE RULES\n<!-- /sibyl:keep -->\nmore junk\n"
lean = M.heuristic_lean(t)
assert "CORE RULES" in lean and "junk" not in lean
# else keep first ## section, trim later ones
lean2 = M.heuristic_lean(BLOATED_CLAUDE)
assert "Identity" in lean2
assert "Accumulated memory" not in lean2 # later section trimmed
assert len(lean2) < len(BLOATED_CLAUDE)
assert "lives in Sibyl Memory" in lean2 # pointer appended
def test_debloat_refuses_without_backup(tmp_path):
f = tmp_path / "CLAUDE.md"; f.write_text(BLOATED_CLAUDE, encoding="utf-8")
out = M.debloat_file(f, "lean", backup_exists=False)
assert not out["written"] and "refused" in out["error"]
assert f.read_text(encoding="utf-8") == BLOATED_CLAUDE # untouched
def test_debloat_trims_with_backup_and_dry_run(tmp_path):
f = tmp_path / "CLAUDE.md"; f.write_text(BLOATED_CLAUDE, encoding="utf-8")
lean = M.heuristic_lean(BLOATED_CLAUDE)
# dry-run does not write
dry = M.debloat_file(f, lean, backup_exists=True, dry_run=True)
assert not dry["written"] and f.read_text(encoding="utf-8") == BLOATED_CLAUDE
assert dry["after"] < dry["before"]
# real write trims
real = M.debloat_file(f, lean, backup_exists=True)
assert real["written"] and f.read_text(encoding="utf-8") == lean
assert f.stat().st_size < real["before"]
def test_detect_state_snapshot(tmp_path):
home = _fake_home(tmp_path)
st = M.detect_state(home, cwd=home / "myproj", db_path=home / ".sibyl-memory" / "memory.db")
assert "files" in st and len(st["files"]) >= 4
assert set(st["harnesses"]) == {"claude-code", "codex", "hermes"}
assert st["db_entries"] == 0