| |
| """Phase 1: Extract macro definitions and call sites from all libraries. |
| |
| Usage: python3 scripts/extract_all.py [--tier tier1] [--library alexandria] |
| """ |
|
|
| from __future__ import annotations |
|
|
| import json |
| import sys |
| from datetime import datetime, timezone |
| from pathlib import Path |
|
|
| sys.path.insert(0, str(Path(__file__).parent.parent / "src")) |
|
|
| from cl_macros.ext.call_site_scanner import CallSiteScanner |
| from cl_macros.ext.defmacro_parser import DefmacroParser |
| from cl_macros.ext.library_index import LibraryIndex |
| from cl_macros.ext.source_fetcher import SourceFetcher |
|
|
| EXTRACTIONS_DIR = Path("data/extractions") |
|
|
|
|
| def extract_library(lib, fetcher, parser, scanner, output_dir): |
| """Extract macros and call sites from one library.""" |
| print(f"\n{'='*60}") |
| print(f"Extracting from: {lib.name} ({lib.tier})") |
| print(f" {lib.description}") |
|
|
| try: |
| root = fetcher.fetch(lib) |
| print(f" source: {root}") |
| except Exception as e: |
| print(f" SKIPPED: {e}") |
| return 0, 0 |
|
|
| files = fetcher.get_all_lisp_files(lib) |
| print(f" lisp files: {len(files)}") |
|
|
| |
| macro_defs = [] |
| for f in files: |
| macro_defs.extend(parser.extract_file(f)) |
|
|
| if parser.errors: |
| print(f" parser errors: {len(parser.errors)}") |
| for err in parser.errors[:3]: |
| print(f" {err}") |
|
|
| print(f" macros extracted: {len(macro_defs)}") |
|
|
| |
| scanner = CallSiteScanner() |
| total_call_sites = 0 |
| call_site_records = [] |
|
|
| for mdef in macro_defs: |
| sites = scanner.find_call_sites(mdef.macro_name, files, macro_defs) |
| total_call_sites += len(sites) |
| for site in sites: |
| call_site_records.append({ |
| "library": lib.name, |
| "macro_name": site.macro_name, |
| "source_file": site.source_file, |
| "line_number": site.line_number, |
| "call_form": site.call_form, |
| "context_lines": site.context_lines, |
| }) |
|
|
| print(f" call sites found: {total_call_sites}") |
|
|
| |
| ts = datetime.now(timezone.utc).isoformat() |
| out_records = [] |
| for mdef in macro_defs: |
| out_records.append({ |
| "id": f"{lib.name}-{mdef.macro_name}", |
| "library_name": lib.name, |
| "system_name": lib.systems[0] if lib.systems else lib.name, |
| "source_file": str(mdef.source_file) if mdef.source_file else "", |
| "macro_name": mdef.macro_name, |
| "macro_definition": mdef.full_form, |
| "form_type": mdef.form_type, |
| "docstring": mdef.docstring, |
| "args": mdef.args, |
| "extracted_at": ts, |
| "status": "extracted", |
| }) |
|
|
| |
| output_dir.mkdir(parents=True, exist_ok=True) |
| ext_path = output_dir / f"{lib.name}_extractions.jsonl" |
| with open(ext_path, "w") as f: |
| for rec in out_records: |
| f.write(json.dumps(rec) + "\n") |
|
|
| |
| cs_path = output_dir / f"{lib.name}_call_sites.jsonl" |
| with open(cs_path, "w") as f: |
| for rec in call_site_records: |
| f.write(json.dumps(rec) + "\n") |
|
|
| return len(macro_defs), total_call_sites |
|
|
|
|
| def main(): |
| import argparse |
|
|
| ap = argparse.ArgumentParser() |
| ap.add_argument("--tier", default="tier1") |
| ap.add_argument("--library", default=None) |
| ap.add_argument("--output-dir", default=str(EXTRACTIONS_DIR)) |
| args = ap.parse_args() |
|
|
| output_dir = Path(args.output_dir) |
| idx = LibraryIndex() |
| fetcher = SourceFetcher() |
| parser = DefmacroParser() |
|
|
| if args.library: |
| libs = [idx.get(args.library)] |
| if libs[0] is None: |
| print(f"Library '{args.library}' not found in index") |
| sys.exit(1) |
| else: |
| libs = idx.list_libraries(args.tier) |
|
|
| print(f"Extracting from {len(libs)} libraries (tier={args.tier})") |
| print(f"Output: {output_dir}") |
|
|
| total_macros = 0 |
| total_sites = 0 |
| successes = 0 |
| failures = 0 |
|
|
| for lib in libs: |
| macros, sites = extract_library(lib, fetcher, parser, CallSiteScanner(), output_dir) |
| total_macros += macros |
| total_sites += sites |
| if macros > 0: |
| successes += 1 |
| else: |
| failures += 1 |
|
|
| |
| manifest = { |
| "extracted_at": datetime.now(timezone.utc).isoformat(), |
| "tier": args.tier, |
| "libraries_total": len(libs), |
| "libraries_successful": successes, |
| "libraries_failed": failures, |
| "macros_total": total_macros, |
| "call_sites_total": total_sites, |
| "output_dir": str(output_dir), |
| } |
| manifest_path = output_dir / "extraction_manifest.json" |
| with open(manifest_path, "w") as f: |
| json.dump(manifest, f, indent=2) |
|
|
| print(f"\n{'='*60}") |
| print(f"PHASE 1 COMPLETE") |
| print(f" Libraries: {successes}/{len(libs)} successful") |
| print(f" Macros extracted: {total_macros}") |
| print(f" Call sites found: {total_sites}") |
| print(f" Manifest: {manifest_path}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|