Spaces:
Running
Running
File size: 3,695 Bytes
5520f8e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | #!/usr/bin/env python3
"""Scan the lutar-lean corpus for theorems/lemmas and classify each as
PROVEN / AXIOM / SORRY with file:line. Honest by construction:
- AXIOM: a line whose initial token (after optional private modifier) is `axiom`.
- SORRY: a theorem/lemma/def whose proof body contains a non-comment `sorry`
token before the next top-level declaration.
- PROVEN: a theorem/lemma with no `sorry` in its body and not an axiom.
This is a static classifier (no kernel run). When the kernel build is GREEN,
PROVEN means the elaborator accepted the proof; SORRY entries are the open
obligations the kernel admits via the `sorry` axiom; AXIOM entries are explicit
postulates. The classification is conservative: any ambiguity is reported as the
weaker status (SORRY > AXIOM > PROVEN in honesty ordering is not assumed; we just
flag the literal tokens present).
"""
from __future__ import annotations
import json, os, re, sys
DECL_RE = re.compile(
r"^(?P<indent>\s*)(?:@\[[^\]]*\]\s*)?(?:private\s+)?(?:noncomputable\s+)?(?:private\s+)?"
r"(?P<kind>theorem|lemma|def|abbrev|instance)\s+(?P<name>[A-Za-z_][A-Za-z0-9_'.]*)"
)
AXIOM_RE = re.compile(r"^\s*(?:private\s+)?axiom\s+(?P<name>[A-Za-z_][A-Za-z0-9_'.]*)")
SORRY_RE = re.compile(r"\bsorry\b")
COMMENT_LINE_RE = re.compile(r"^\s*--")
def iter_lean_files(root: str):
for base in ("Lutar", "TH8"):
d = os.path.join(root, base)
if os.path.isdir(d):
for dp, _dirs, files in os.walk(d):
for fn in files:
if fn.endswith(".lean"):
yield os.path.join(dp, fn)
for top in ("Main.lean", "MainRef.lean", "Lutar.lean", "RefVectors.lean"):
p = os.path.join(root, top)
if os.path.exists(p):
yield p
def scan(root: str) -> list[dict]:
out = []
for path in sorted(iter_lean_files(root)):
rel = os.path.relpath(path, root)
with open(path, "r", encoding="utf-8", errors="replace") as fh:
lines = fh.readlines()
# First pass: axioms
for i, line in enumerate(lines):
m = AXIOM_RE.match(line)
if m:
out.append({"name": m.group("name"), "kind": "axiom",
"status": "AXIOM", "file": rel, "line": i + 1})
# Second pass: decls + body sorry detection
decl_idxs = [i for i, ln in enumerate(lines) if DECL_RE.match(ln)]
for j, i in enumerate(decl_idxs):
m = DECL_RE.match(lines[i])
name = m.group("name")
kind = m.group("kind")
end = decl_idxs[j + 1] if j + 1 < len(decl_idxs) else len(lines)
body = lines[i:end]
has_sorry = any(SORRY_RE.search(ln) and not COMMENT_LINE_RE.match(ln)
for ln in body)
# only count theorem/lemma as "theorems"; def/abbrev/instance noted too
status = "SORRY" if has_sorry else "PROVEN"
out.append({"name": name, "kind": kind, "status": status,
"file": rel, "line": i + 1})
return out
def summarize(items: list[dict]) -> dict:
theorems = [x for x in items if x["kind"] in ("theorem", "lemma")]
return {
"total_declarations": len(items),
"theorems_and_lemmas": len(theorems),
"proven": sum(1 for x in theorems if x["status"] == "PROVEN"),
"sorry": sum(1 for x in items if x["status"] == "SORRY"),
"axiom": sum(1 for x in items if x["status"] == "AXIOM"),
}
if __name__ == "__main__":
root = sys.argv[1] if len(sys.argv) > 1 else "."
items = scan(root)
print(json.dumps({"summary": summarize(items), "items": items}, indent=2))
|