File size: 5,172 Bytes
d69fc90 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 | #!/usr/bin/env python3
"""Phase 1: Extract macro definitions and call sites from all libraries.
Usage: python3 scripts/extract_all.py [--tier tier1] [--library alexandria]
"""
from __future__ import annotations
import json
import sys
from datetime import datetime, timezone
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
from cl_macros.ext.call_site_scanner import CallSiteScanner
from cl_macros.ext.defmacro_parser import DefmacroParser
from cl_macros.ext.library_index import LibraryIndex
from cl_macros.ext.source_fetcher import SourceFetcher
EXTRACTIONS_DIR = Path("data/extractions")
def extract_library(lib, fetcher, parser, scanner, output_dir):
"""Extract macros and call sites from one library."""
print(f"\n{'='*60}")
print(f"Extracting from: {lib.name} ({lib.tier})")
print(f" {lib.description}")
try:
root = fetcher.fetch(lib)
print(f" source: {root}")
except Exception as e:
print(f" SKIPPED: {e}")
return 0, 0
files = fetcher.get_all_lisp_files(lib)
print(f" lisp files: {len(files)}")
# Extract macro definitions
macro_defs = []
for f in files:
macro_defs.extend(parser.extract_file(f))
if parser.errors:
print(f" parser errors: {len(parser.errors)}")
for err in parser.errors[:3]:
print(f" {err}")
print(f" macros extracted: {len(macro_defs)}")
# Extract call sites for each macro
scanner = CallSiteScanner()
total_call_sites = 0
call_site_records = []
for mdef in macro_defs:
sites = scanner.find_call_sites(mdef.macro_name, files, macro_defs)
total_call_sites += len(sites)
for site in sites:
call_site_records.append({
"library": lib.name,
"macro_name": site.macro_name,
"source_file": site.source_file,
"line_number": site.line_number,
"call_form": site.call_form,
"context_lines": site.context_lines,
})
print(f" call sites found: {total_call_sites}")
# Save extraction records
ts = datetime.now(timezone.utc).isoformat()
out_records = []
for mdef in macro_defs:
out_records.append({
"id": f"{lib.name}-{mdef.macro_name}",
"library_name": lib.name,
"system_name": lib.systems[0] if lib.systems else lib.name,
"source_file": str(mdef.source_file) if mdef.source_file else "",
"macro_name": mdef.macro_name,
"macro_definition": mdef.full_form,
"form_type": mdef.form_type,
"docstring": mdef.docstring,
"args": mdef.args,
"extracted_at": ts,
"status": "extracted",
})
# Write extractions
output_dir.mkdir(parents=True, exist_ok=True)
ext_path = output_dir / f"{lib.name}_extractions.jsonl"
with open(ext_path, "w") as f:
for rec in out_records:
f.write(json.dumps(rec) + "\n")
# Write call sites
cs_path = output_dir / f"{lib.name}_call_sites.jsonl"
with open(cs_path, "w") as f:
for rec in call_site_records:
f.write(json.dumps(rec) + "\n")
return len(macro_defs), total_call_sites
def main():
import argparse
ap = argparse.ArgumentParser()
ap.add_argument("--tier", default="tier1")
ap.add_argument("--library", default=None)
ap.add_argument("--output-dir", default=str(EXTRACTIONS_DIR))
args = ap.parse_args()
output_dir = Path(args.output_dir)
idx = LibraryIndex()
fetcher = SourceFetcher()
parser = DefmacroParser()
if args.library:
libs = [idx.get(args.library)]
if libs[0] is None:
print(f"Library '{args.library}' not found in index")
sys.exit(1)
else:
libs = idx.list_libraries(args.tier)
print(f"Extracting from {len(libs)} libraries (tier={args.tier})")
print(f"Output: {output_dir}")
total_macros = 0
total_sites = 0
successes = 0
failures = 0
for lib in libs:
macros, sites = extract_library(lib, fetcher, parser, CallSiteScanner(), output_dir)
total_macros += macros
total_sites += sites
if macros > 0:
successes += 1
else:
failures += 1
# Write manifest
manifest = {
"extracted_at": datetime.now(timezone.utc).isoformat(),
"tier": args.tier,
"libraries_total": len(libs),
"libraries_successful": successes,
"libraries_failed": failures,
"macros_total": total_macros,
"call_sites_total": total_sites,
"output_dir": str(output_dir),
}
manifest_path = output_dir / "extraction_manifest.json"
with open(manifest_path, "w") as f:
json.dump(manifest, f, indent=2)
print(f"\n{'='*60}")
print(f"PHASE 1 COMPLETE")
print(f" Libraries: {successes}/{len(libs)} successful")
print(f" Macros extracted: {total_macros}")
print(f" Call sites found: {total_sites}")
print(f" Manifest: {manifest_path}")
if __name__ == "__main__":
main()
|