#!/usr/bin/env python3 """Phase 1: Extract macro definitions and call sites from all libraries. Usage: python3 scripts/extract_all.py [--tier tier1] [--library alexandria] """ from __future__ import annotations import json import sys from datetime import datetime, timezone from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent / "src")) from cl_macros.ext.call_site_scanner import CallSiteScanner from cl_macros.ext.defmacro_parser import DefmacroParser from cl_macros.ext.library_index import LibraryIndex from cl_macros.ext.source_fetcher import SourceFetcher EXTRACTIONS_DIR = Path("data/extractions") def extract_library(lib, fetcher, parser, scanner, output_dir): """Extract macros and call sites from one library.""" print(f"\n{'='*60}") print(f"Extracting from: {lib.name} ({lib.tier})") print(f" {lib.description}") try: root = fetcher.fetch(lib) print(f" source: {root}") except Exception as e: print(f" SKIPPED: {e}") return 0, 0 files = fetcher.get_all_lisp_files(lib) print(f" lisp files: {len(files)}") # Extract macro definitions macro_defs = [] for f in files: macro_defs.extend(parser.extract_file(f)) if parser.errors: print(f" parser errors: {len(parser.errors)}") for err in parser.errors[:3]: print(f" {err}") print(f" macros extracted: {len(macro_defs)}") # Extract call sites for each macro scanner = CallSiteScanner() total_call_sites = 0 call_site_records = [] for mdef in macro_defs: sites = scanner.find_call_sites(mdef.macro_name, files, macro_defs) total_call_sites += len(sites) for site in sites: call_site_records.append({ "library": lib.name, "macro_name": site.macro_name, "source_file": site.source_file, "line_number": site.line_number, "call_form": site.call_form, "context_lines": site.context_lines, }) print(f" call sites found: {total_call_sites}") # Save extraction records ts = datetime.now(timezone.utc).isoformat() out_records = [] for mdef in macro_defs: out_records.append({ "id": f"{lib.name}-{mdef.macro_name}", "library_name": lib.name, "system_name": lib.systems[0] if lib.systems else lib.name, "source_file": str(mdef.source_file) if mdef.source_file else "", "macro_name": mdef.macro_name, "macro_definition": mdef.full_form, "form_type": mdef.form_type, "docstring": mdef.docstring, "args": mdef.args, "extracted_at": ts, "status": "extracted", }) # Write extractions output_dir.mkdir(parents=True, exist_ok=True) ext_path = output_dir / f"{lib.name}_extractions.jsonl" with open(ext_path, "w") as f: for rec in out_records: f.write(json.dumps(rec) + "\n") # Write call sites cs_path = output_dir / f"{lib.name}_call_sites.jsonl" with open(cs_path, "w") as f: for rec in call_site_records: f.write(json.dumps(rec) + "\n") return len(macro_defs), total_call_sites def main(): import argparse ap = argparse.ArgumentParser() ap.add_argument("--tier", default="tier1") ap.add_argument("--library", default=None) ap.add_argument("--output-dir", default=str(EXTRACTIONS_DIR)) args = ap.parse_args() output_dir = Path(args.output_dir) idx = LibraryIndex() fetcher = SourceFetcher() parser = DefmacroParser() if args.library: libs = [idx.get(args.library)] if libs[0] is None: print(f"Library '{args.library}' not found in index") sys.exit(1) else: libs = idx.list_libraries(args.tier) print(f"Extracting from {len(libs)} libraries (tier={args.tier})") print(f"Output: {output_dir}") total_macros = 0 total_sites = 0 successes = 0 failures = 0 for lib in libs: macros, sites = extract_library(lib, fetcher, parser, CallSiteScanner(), output_dir) total_macros += macros total_sites += sites if macros > 0: successes += 1 else: failures += 1 # Write manifest manifest = { "extracted_at": datetime.now(timezone.utc).isoformat(), "tier": args.tier, "libraries_total": len(libs), "libraries_successful": successes, "libraries_failed": failures, "macros_total": total_macros, "call_sites_total": total_sites, "output_dir": str(output_dir), } manifest_path = output_dir / "extraction_manifest.json" with open(manifest_path, "w") as f: json.dump(manifest, f, indent=2) print(f"\n{'='*60}") print(f"PHASE 1 COMPLETE") print(f" Libraries: {successes}/{len(libs)} successful") print(f" Macros extracted: {total_macros}") print(f" Call sites found: {total_sites}") print(f" Manifest: {manifest_path}") if __name__ == "__main__": main()