Spaces:
Running
Running
| import Lean | |
| import Mathlib.Lean.Expr.Basic | |
| import Mathlib.Lean.CoreM | |
| import Batteries.Lean.Util.Path | |
| import Batteries.Data.String.Matcher | |
| open Lean | |
| open Lean.Meta | |
| open Lean.Elab | |
| open Lean.Elab.Command | |
| /- | |
| This small file is modified from: https://github.com/semorrison/lean-training-data | |
| You can refer to the file structure there for more details. | |
| To run this, there is no need to download another 4GB of Mathlib files. | |
| Just prepare a Lean project with Mathlib installed. Suppose the project is in the folder LeanProj. | |
| Your Mathlib files should live in LeanProj/.lake/packages/mathlib. | |
| My LeanProj/lean-toolchain says I am using version: leanprover/lean4:v4.9.0-rc2 | |
| Now, put this extract.lean in LeanProj/scripts/extract.lean | |
| In LeanProj/lakefile.lean, below | |
| @[default_target] | |
| lean_lib «LeanProj» where | |
| add this: | |
| @[default_target] | |
| lean_exe extract where | |
| root := `scripts.extract | |
| supportInterpreter := true | |
| This will make the `lake exe extract` command available, | |
| and enable the correct `compile_time_search_path%` in the script. | |
| Now in the terminal, run: | |
| cd path-to-LeanProj | |
| mkdir data | |
| lake exe extract > data/theorems.txt | |
| The stdout (IO.println) will be directed to data/theorems.txt, | |
| The stderr (IO.eprintln) will be shown in the terminal. You will see the progress. It takes about 15 minutes. | |
| -/ | |
| /-- | |
| Helps print kinds of constants | |
| and distinguish between theorems, definitions, axioms, etc. | |
| -/ | |
| def Lean.ConstantInfo.kind : ConstantInfo → String | |
| | .axiomInfo _ => "axiom" | |
| | .defnInfo _ => "def" | |
| | .thmInfo _ => "theorem" | |
| | .opaqueInfo _ => "opaque" | |
| | .quotInfo _ => "quot" -- Not sure what this is! | |
| | .inductInfo _ => "inductive" | |
| | .ctorInfo _ => "constructor" | |
| | .recInfo _ => "recursor" | |
| def truncatePrint (s : String) : IO Unit := do | |
| IO.println (if s.length < 1000 then s else s.take 997 ++ " ...") | |
| def main : IO Unit := do | |
| IO.eprintln "\n\n" | |
| IO.eprintln "importing modules" | |
| searchPathRef.set compile_time_search_path% | |
| CoreM.withImportModules #[`Mathlib] (options := {entries := [ | |
| -- (`maxHeartbeats, .ofNat 0), | |
| (`smartUnfolding, .ofBool false), | |
| ]}) do | |
| IO.eprintln s!"simplification timeout: {← getMaxHeartbeats}" | |
| let env ← getEnv | |
| IO.eprintln "filtering out unwanted objects" | |
| let mut x := 0 | |
| let mut lst := #[] | |
| lst := lst ++ env.constants.map₁.toArray | |
| lst := lst ++ env.constants.map₂.toArray | |
| lst := lst.filter fun (name, info) => ( | |
| !name.isInternalOrNum -- s.startsWith "_" | .num | |
| && !name.isInaccessibleUserName -- s.contains '✝' || s == "_inaccessible" | |
| && !name.isImplementationDetail -- s.startsWith "__" | |
| && !(name.toString.findSubstr? ".proof_" |>.isSome) | |
| && !(name.toString.findSubstr? ".match_" |>.isSome) | |
| ) | |
| -- name.toString.startsWith "Nat.add" | |
| IO.eprintln s!"printing {lst.size} theorems" | |
| for (name, info) in lst do | |
| IO.println "----thm----" | |
| -- print module. | |
| -- eg. "import Mathlib.Data.Real.Basic" | |
| match (← findModuleOf? name) with | |
| | .none => IO.println "unknown" | |
| | .some mod => IO.println s!"import {mod}" | |
| IO.println "--thm--" | |
| -- print kine and decl name | |
| -- eg. "theorem Nat.add_comm" or "def Nat.add" | |
| IO.println s!"{info.kind} {name}" | |
| IO.println "--thm--" | |
| -- print decl type. | |
| -- eg. "∀ (n m : Nat), n + m = m + n" | |
| -- ppExpr is better than dbgToString | |
| let ppType := toString (← MetaM.run' do ppExpr info.type) | |
| truncatePrint ppType | |
| IO.println "--thm--" | |
| -- print doc string if exists. | |
| -- eg. O(|l| + |r|). Merge two lists using s as a switch. | |
| match (← findDocString? env name) with | |
| | .none => IO.println "" | |
| | .some doc => IO.println doc | |
| -- IO.println "--thm--" | |
| -- print decl value. These Lean.Expr are very large. | |
| -- match info.value? with | |
| -- | .none => IO.println "" | |
| -- | .some val => IO.println (toString (← MetaM.run' do ppExpr val)) | |
| -- IO.println "--thm--" | |
| x := x + 1 | |
| if x % 100 == 0 then | |
| IO.eprintln s!"printed {x} / {lst.size} theorems, {x.toFloat / lst.size.toFloat * 100}%" | |
| -- if x ≥ 2 then break | |
| IO.eprintln "done" | |