Agora-Opt / src /debate_memory /debug_memory_builder.py
SorrowTea's picture
Upload 45 files
96abbd8 verified
"""Convert debug_memory.jsonl records into a searchable MemoryBank."""
from __future__ import annotations
import argparse
import glob
import hashlib
import json
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional
from .memory_bank import MemoryBank
PROJECT_ROOT = Path(__file__).resolve().parents[2]
LEGACY_ROOT = PROJECT_ROOT.parent / "debate_with_memory"
def _default_inputs() -> List[str]:
candidates = [
PROJECT_ROOT / "memory_storage" / "debug_memory.jsonl",
LEGACY_ROOT / "memory_storage" / "debug_memory.jsonl",
PROJECT_ROOT / "memory_storage" / "backups" / "*" / "debug_memory.jsonl",
LEGACY_ROOT / "memory_storage" / "backups" / "*" / "debug_memory.jsonl",
]
return [str(path) for path in candidates]
def _stable_id(signature: str) -> int:
digest = hashlib.sha1(signature.encode("utf-8")).hexdigest()
return int(digest[:12], 16)
def _parse_timestamp(ts: Optional[str]) -> datetime:
if not ts:
return datetime.min
try:
return datetime.fromisoformat(ts)
except ValueError:
return datetime.min
def load_debug_records(input_globs: List[str]) -> Dict[str, Dict]:
records: Dict[str, Dict] = {}
files: List[str] = []
for pattern in input_globs:
files.extend(glob.glob(pattern))
files = sorted({Path(f) for f in files if Path(f).exists()})
for file_path in files:
with file_path.open("r", encoding="utf-8") as fh:
for line in fh:
line = line.strip()
if not line:
continue
try:
record = json.loads(line)
except json.JSONDecodeError:
continue
signature = record.get("signature")
if not signature:
continue
ts = _parse_timestamp(record.get("timestamp"))
existing = records.get(signature)
if existing is None or ts > existing.get("_ts", datetime.min):
record["_ts"] = ts
records[signature] = record
return records
def build_debug_memory(records: Dict[str, Dict], output_dir: Path, clear: bool) -> None:
if clear and output_dir.exists():
for child in output_dir.iterdir():
if child.is_file():
child.unlink()
else:
import shutil
shutil.rmtree(child)
bank = MemoryBank(memory_dir=str(output_dir))
added = 0
for signature, record in records.items():
description = record.get("description", "Unknown problem")
error_text = record.get("error_text", "")
guidance = record.get("guidance", "")
status = record.get("status", "")
metadata = {
"signature": signature,
"status": status,
"timestamp": record.get("timestamp"),
**(record.get("metadata") or {}),
}
note_lines = ["# Debug Memory Case", f"Signature: {signature}", f"Status: {status}"]
if guidance:
note_lines.append(f"Guidance: {guidance}")
note_lines.append("---")
if error_text:
note_lines.append("Error snippet:\n" + error_text)
note_lines.append("---")
note_lines.append(f"Source metadata: {metadata}")
prompt_desc = (
f"{description}\n\n## Error Details\n```\n{error_text}\n```\n"
f"## Guidance\n{guidance or 'N/A'}\n"
)
problem_id = record.get("problem_id")
if problem_id is None:
problem_id = _stable_id(signature)
try:
bank.add_case(
problem_id=int(problem_id),
problem_desc=prompt_desc,
solution_code="\n".join(note_lines),
objective_value=0.0,
is_correct=True,
metadata=metadata,
)
added += 1
except Exception as exc: # noqa: BLE001
print(f"Failed to add debug case {signature}: {exc}")
print(f"✅ Added {added} debug cases to {output_dir}")
def parse_args():
parser = argparse.ArgumentParser(description="Build debug memory bank from debug_memory.jsonl records")
parser.add_argument(
"--input", nargs="*", default=_default_inputs(), help="Input files/globs containing debug records",
)
parser.add_argument(
"--output_dir",
type=str,
default=str(PROJECT_ROOT / "debug_case_memory"),
help="Where to store the constructed memory bank",
)
parser.add_argument(
"--clear",
action="store_true",
help="Remove existing output_dir contents before rebuilding",
)
return parser.parse_args()
def main():
args = parse_args()
records = load_debug_records(args.input)
print(f"Loaded {len(records)} unique debug signatures")
build_debug_memory(records, Path(args.output_dir), clear=args.clear)
if __name__ == "__main__":
main()