Spaces:

Ray5th
/

Lean4-helper

Running

p4r5kpftnp-cmd Claude Sonnet 4.6 commited on May 14

Commit

3ac681e

1 Parent(s): 4562b5e

Add LangChain/LangGraph RAG pipeline for retrieval-augmented proof generation

- src/mathlib_corpus.py: walks Mathlib4 .lean source files and extracts
theorem/lemma/def declarations as LangChain Documents
- src/retriever.py: MathLibRetriever — FAISS + BM25 hybrid retrieval via
EnsembleRetriever (60/40), reranked with CrossEncoder; index persisted to
data/mathlib_index/ for reuse across runs
- src/rag_chain.py: RAGProofChain — LangChain LCEL chain
(ChatPromptTemplate | OllamaLLM | StrOutputParser) that injects retrieved
lemma context into each proof-generation call
- src/langgraph_agent.py: LangGraphAgent — replaces the plain Python retry
loop with a proper state machine (verify → retrieve → generate → verify);
exposes the same solve_file() API
- src/proof_agent.py: thin backward-compatible wrapper around LangGraphAgent
- scripts/build_index.py: one-time offline script to build and save the FAISS
index from Mathlib4 source
- scripts/run_agent.py: updated to use LangGraphAgent directly
- requirements.txt: add faiss-cpu, rank-bm25, sentence-transformers,
langgraph, langchain-ollama

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (13) hide show

problems/simple_add.lean +4 -0
problems/test_problem.lean +4 -0
requirements.txt +9 -0
scripts/build_index.py +40 -0
scripts/run_agent.py +36 -0
src/langgraph_agent.py +173 -0
src/lean_verifier.py +70 -0
src/lmm_client.py +54 -0
src/mathlib_corpus.py +117 -0
src/proof_agent.py +11 -0
src/rag_chain.py +71 -0
src/retriever.py +126 -0
tests/test_lean_verifier.py +57 -0

problems/simple_add.lean ADDED Viewed

	@@ -0,0 +1,4 @@

+import Mathlib
+theorem add_zero_simple (n : ℕ) : n + 0 = n := by
+  sorry

problems/test_problem.lean ADDED Viewed

	@@ -0,0 +1,4 @@

+import Mathlib
+theorem square_root_two_irrational (n m : ℕ) (h : n^2 = 2 * m^2) (hm : m ≠ 0) : False := by
+  sorry

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+lean-interact
+ollama
+langchain
+langchain-community
+langchain-ollama
+langgraph
+faiss-cpu
+rank-bm25
+sentence-transformers

scripts/build_index.py ADDED Viewed

	@@ -0,0 +1,40 @@

+#!/usr/bin/env python3
+"""
+One-time script to build the FAISS + BM25 index from Mathlib4 source files.
+Run this before using the LangGraph agent for the first time:
+    python scripts/build_index.py
+Optional flags:
+    --mathlib-root PATH   Path to Mathlib4 source (auto-detected if omitted)
+    --max-files N         Limit to first N .lean files (useful for quick testing)
+    --index-dir PATH      Where to save the index (default: data/mathlib_index)
+"""
+import sys
+import os
+import argparse
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src')))
+from retriever import MathLibRetriever
+def main():
+    parser = argparse.ArgumentParser(description="Build Mathlib FAISS index.")
+    parser.add_argument("--mathlib-root", default=None, help="Path to Mathlib4 source root")
+    parser.add_argument("--max-files", type=int, default=None, help="Limit number of .lean files processed")
+    parser.add_argument("--index-dir", default=None, help="Directory to save the index")
+    args = parser.parse_args()
+    retriever = MathLibRetriever(index_dir=args.index_dir)
+    if retriever.is_index_built() and not args.max_files:
+        print(f"Index already exists at {retriever.index_dir}. Delete it to rebuild.")
+        return
+    retriever.build(mathlib_root=args.mathlib_root, max_files=args.max_files)
+    print("Done. Index is ready for use.")
+if __name__ == "__main__":
+    main()

scripts/run_agent.py ADDED Viewed

	@@ -0,0 +1,36 @@

+#!/usr/bin/env python3
+import sys
+import os
+import argparse
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src')))
+from langgraph_agent import LangGraphAgent
+def main():
+    parser = argparse.ArgumentParser(description="Run the LangGraph Lean Proof Agent on a file.")
+    parser.add_argument("file", help="Path to the .lean file to solve")
+    parser.add_argument("--model", default="qwen3-vl:4b", help="Ollama model name")
+    parser.add_argument("--retries", type=int, default=5, help="Max retries")
+    parser.add_argument("--index-dir", default=None, help="Path to pre-built FAISS index directory")
+    args = parser.parse_args()
+    print(f"Starting LangGraph Proof Agent with model: {args.model}")
+    agent = LangGraphAgent(
+        model_name=args.model,
+        max_retries=args.retries,
+        index_dir=args.index_dir,
+    )
+    success = agent.solve_file(args.file)
+    if success:
+        print("\nSuccess! The proof has been verified.")
+    else:
+        print("\nFailed to verify the proof.")
+if __name__ == "__main__":
+    main()

src/langgraph_agent.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import os
+from typing import List, TypedDict
+from langgraph.graph import END, StateGraph
+from lean_verifier import LeanEnvironment
+from rag_chain import RAGProofChain
+from retriever import MathLibRetriever
+# ---------------------------------------------------------------------------
+# State
+# ---------------------------------------------------------------------------
+class ProofState(TypedDict):
+    file_path: str
+    lean_code: str
+    goals: List[str]
+    errors: List[str]
+    attempt: int
+    max_retries: int
+    status: str          # "pending" | "success" | "failed"
+    retrieved_lemmas: list
+# ---------------------------------------------------------------------------
+# Nodes
+# ---------------------------------------------------------------------------
+def _read_file(path: str) -> str:
+    with open(path, "r") as f:
+        return f.read()
+def _write_file(path: str, code: str) -> None:
+    with open(path, "w") as f:
+        f.write(code)
+def _extract_lean_code(text: str) -> str:
+    if "```lean" in text:
+        return text.split("```lean")[1].split("```")[0].strip()
+    if "```" in text:
+        return text.split("```")[1].split("```")[0].strip()
+    return text.strip()
+def make_verify_node(lean_env: LeanEnvironment):
+    def verify_node(state: ProofState) -> ProofState:
+        print(f"\n--- Attempt {state['attempt'] + 1} / {state['max_retries']} ---")
+        code = _read_file(state["file_path"])
+        result = lean_env.verify_proof(code)
+        new_status = "success" if result["status"] == "success" else "pending"
+        if new_status == "success":
+            print("Proof verified successfully!")
+        else:
+            print(
+                f"Verification failed. "
+                f"Errors: {len(result['errors'])}, Goals: {len(result['goals'])}"
+            )
+        return {
+            **state,
+            "lean_code": code,
+            "errors": result["errors"],
+            "goals": result["goals"],
+            "status": new_status,
+        }
+    return verify_node
+def make_retrieve_node(retriever: MathLibRetriever):
+    def retrieve_node(state: ProofState) -> ProofState:
+        query = " ".join(state["goals"] + state["errors"])
+        print("Retrieving relevant Mathlib lemmas…")
+        lemmas = retriever.retrieve(query)
+        print(f"  Retrieved {len(lemmas)} lemma(s).")
+        return {**state, "retrieved_lemmas": lemmas}
+    return retrieve_node
+def make_generate_node(chain: RAGProofChain):
+    def generate_node(state: ProofState) -> ProofState:
+        print("Generating proof with LLM…")
+        raw = chain.generate(
+            lean_code=state["lean_code"],
+            goals=state["goals"],
+            errors=state["errors"],
+            retrieved_lemmas=state["retrieved_lemmas"],
+        )
+        new_code = _extract_lean_code(raw)
+        if not new_code or new_code.strip() == state["lean_code"].strip():
+            print("LLM produced no changes.")
+            return {**state, "attempt": state["attempt"] + 1, "status": "failed"}
+        _write_file(state["file_path"], new_code)
+        print("File updated.")
+        return {
+            **state,
+            "lean_code": new_code,
+            "attempt": state["attempt"] + 1,
+        }
+    return generate_node
+# ---------------------------------------------------------------------------
+# Router
+# ---------------------------------------------------------------------------
+def should_continue(state: ProofState) -> str:
+    if state["status"] == "success":
+        return END
+    if state["attempt"] >= state["max_retries"]:
+        return END
+    return "retrieve"
+# ---------------------------------------------------------------------------
+# Graph assembly
+# ---------------------------------------------------------------------------
+def build_graph(lean_env: LeanEnvironment, retriever: MathLibRetriever, chain: RAGProofChain):
+    g = StateGraph(ProofState)
+    g.add_node("verify", make_verify_node(lean_env))
+    g.add_node("retrieve", make_retrieve_node(retriever))
+    g.add_node("generate", make_generate_node(chain))
+    g.set_entry_point("verify")
+    g.add_conditional_edges("verify", should_continue, {"retrieve": "retrieve", END: END})
+    g.add_edge("retrieve", "generate")
+    g.add_edge("generate", "verify")
+    return g.compile()
+# ---------------------------------------------------------------------------
+# Public entry point
+# ---------------------------------------------------------------------------
+class LangGraphAgent:
+    def __init__(
+        self,
+        model_name: str = "qwen3-vl:4b",
+        max_retries: int = 5,
+        index_dir: str | None = None,
+    ):
+        self._lean_env = LeanEnvironment(use_mathlib=True)
+        self._retriever = MathLibRetriever(index_dir=index_dir)
+        self._chain = RAGProofChain(model_name=model_name)
+        self._graph = build_graph(self._lean_env, self._retriever, self._chain)
+        self._max_retries = max_retries
+    def solve_file(self, file_path: str) -> bool:
+        if not os.path.exists(file_path):
+            print(f"Error: {file_path} not found.")
+            return False
+        initial: ProofState = {
+            "file_path": file_path,
+            "lean_code": "",
+            "goals": [],
+            "errors": [],
+            "attempt": 0,
+            "max_retries": self._max_retries,
+            "status": "pending",
+            "retrieved_lemmas": [],
+        }
+        final = self._graph.invoke(initial)
+        return final["status"] == "success"

src/lean_verifier.py ADDED Viewed

	@@ -0,0 +1,70 @@

+from typing import Dict, Any, List
+from lean_interact import LeanREPLConfig, LeanServer, Command, TempRequireProject, LeanRequire
+class LeanEnvironment:
+    """
+    Manages the Lean REPL environment for verifying Lean 4 proofs.
+    """
+    def __init__(self, use_mathlib: bool = True, lean_version: str = "v4.8.0"):
+        """
+        Initializes the Lean environment.
+        Args:
+            use_mathlib (bool): If True, configures a TempRequireProject with Mathlib.
+                                This may take a while to build on the first run.
+            lean_version (str): The Lean 4 version to use. Default is v4.8.0.
+        """
+        self.lean_version = lean_version
+        self.use_mathlib = use_mathlib
+        if self.use_mathlib:
+            # We use TempRequireProject with mathlib as specified in lean_interact documentation
+            project = TempRequireProject(
+                lean_version=self.lean_version,
+                require="mathlib"
+            )
+            self.config = LeanREPLConfig(project=project)
+        else:
+            self.config = LeanREPLConfig(lean_version=self.lean_version)
+        self.server = LeanServer(self.config)
+    def verify_proof(self, lean_code: str) -> Dict[str, Any]:
+        """
+        Executes a block of Lean code and verifies if it is a correct proof.
+        Args:
+            lean_code (str): The full Lean 4 code string containing imports, theorem statement, and proof.
+        Returns:
+            dict: A dictionary containing the status, errors (if any), and goals (if open sorries remain).
+        """
+        response = self.server.run(Command(cmd=lean_code))
+        errors = []
+        goals = []
+        # Check for error or warning messages
+        if hasattr(response, 'messages') and response.messages:
+            for msg in response.messages:
+                if msg.severity in ['error', 'warning']:
+                    # E.g., 'declaration uses 'sorry'' is a warning, but we might want to capture it
+                    errors.append(msg.data)
+        # Check for open goals (sorries)
+        if hasattr(response, 'sorries') and response.sorries:
+            for sorry in response.sorries:
+                if sorry.goal:
+                    goals.append(sorry.goal)
+        is_success = len(errors) == 0 and len(goals) == 0
+        return {
+            "status": "success" if is_success else "failure",
+            "errors": errors,
+            "goals": goals,
+            "env": getattr(response, "env", None)
+        }

src/lmm_client.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import ollama
+from typing import List, Dict, Any, Optional
+class LMMClient:
+    """
+    Client for interacting with local LMMs via Ollama.
+    Focuses on Qwen3-VL:4B for high-reasoning tasks.
+    """
+    def __init__(self, model_name: str = "qwen3-vl:4b"):
+        self.model_name = model_name
+    def chat(self, prompt: str, system_prompt: Optional[str] = None) -> str:
+        """
+        Sends a chat request to the model.
+        """
+        messages = []
+        if system_prompt:
+            messages.append({'role': 'system', 'content': system_prompt})
+        messages.append({'role': 'user', 'content': prompt})
+        response = ollama.chat(
+            model=self.model_name,
+            messages=messages
+        )
+        return response['message']['content']
+    def generate_proof_steps(self, lean_code: str, goals: List[str], errors: List[str]) -> str:
+        """
+        Specific helper to generate proof steps based on current Lean state.
+        """
+        system_prompt = (
+            "You are an expert Lean 4 proof assistant. "
+            "Your goal is to complete the proof by replacing 'sorry' with valid Lean 4 code. "
+            "Use Mathlib theorems where appropriate. "
+            "Respond ONLY with the corrected Lean code block."
+        )
+        prompt = f"""
+Current Lean Code:
+```lean
+{lean_code}
+```
+Current Proof Goals:
+{chr(10).join(goals)}
+Lean Errors:
+{chr(10).join(errors)}
+Please provide the corrected Lean code. Focus on solving the current goals and fixing the errors.
+"""
+        return self.chat(prompt, system_prompt)

src/mathlib_corpus.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import os
+import re
+import glob
+from pathlib import Path
+from typing import List, Optional
+from langchain_core.documents import Document
+# Regex to capture optional docstring + declaration line
+_DECL_PATTERN = re.compile(
+    r'(?:/--\s*(.*?)\s*-/)?\s*'               # optional /-- docstring -/
+    r'(?:@\[.*?\]\s*)*'                        # optional attributes
+    r'(theorem|lemma|def|noncomputable def)\s+'
+    r'(\S+)\s*'                                # declaration name
+    r'(.*?)\s*:=',                             # everything up to :=
+    re.DOTALL,
+)
+def _find_mathlib_root() -> Optional[str]:
+    """
+    Returns the path to the Mathlib4 source directory, searching common locations.
+    """
+    candidates = [
+        # Lake package cache (used by lean-interact's TempRequireProject)
+        os.path.expanduser("~/.elan/toolchains"),
+        os.path.expanduser("~/.cache/mathlib"),
+        # Nix / Homebrew Lean setups
+        "/usr/local/lib/lean",
+        "/opt/homebrew/lib/lean",
+    ]
+    # Also check for a .lake/packages directory next to the current file
+    here = Path(__file__).resolve().parent.parent
+    lake_pkg = here / ".lake" / "packages" / "mathlib" / "Mathlib"
+    if lake_pkg.exists():
+        return str(lake_pkg.parent)
+    for root in candidates:
+        if not os.path.isdir(root):
+            continue
+        # Walk up to 4 levels looking for a Mathlib directory
+        for dirpath, dirnames, _ in os.walk(root):
+            depth = dirpath.replace(root, "").count(os.sep)
+            if depth > 4:
+                dirnames.clear()
+                continue
+            if "Mathlib" in dirnames:
+                return dirpath
+    return None
+def _parse_lean_file(path: str) -> List[Document]:
+    """
+    Extracts theorem/lemma/def declarations from a single .lean file.
+    Returns a list of Documents with page_content = "<name> : <signature>".
+    """
+    try:
+        text = Path(path).read_text(encoding="utf-8", errors="ignore")
+    except OSError:
+        return []
+    docs = []
+    for match in _DECL_PATTERN.finditer(text):
+        docstring = (match.group(1) or "").strip()
+        kind = match.group(2)
+        name = match.group(3)
+        signature = re.sub(r'\s+', ' ', match.group(4)).strip()
+        content = f"{name} : {signature}"
+        if docstring:
+            content = f"{docstring}\n{content}"
+        line = text[: match.start()].count("\n") + 1
+        docs.append(Document(
+            page_content=content,
+            metadata={"kind": kind, "name": name, "file": path, "line": line},
+        ))
+    return docs
+class MathLibCorpus:
+    """
+    Extracts LangChain Documents from Mathlib4 source files on disk.
+    """
+    def __init__(self, mathlib_root: Optional[str] = None):
+        self.mathlib_root = mathlib_root or _find_mathlib_root()
+    def extract(self, max_files: Optional[int] = None) -> List[Document]:
+        """
+        Walks Mathlib source files and extracts declaration Documents.
+        Args:
+            max_files: If set, stop after processing this many .lean files
+                       (useful for quick tests).
+        Returns:
+            List of LangChain Documents, one per declaration found.
+        """
+        if not self.mathlib_root:
+            raise RuntimeError(
+                "Could not locate Mathlib4 source. "
+                "Pass mathlib_root explicitly or run `lake exe cache get` first."
+            )
+        pattern = os.path.join(self.mathlib_root, "**", "*.lean")
+        files = glob.glob(pattern, recursive=True)
+        if max_files:
+            files = files[:max_files]
+        docs: List[Document] = []
+        for path in files:
+            docs.extend(_parse_lean_file(path))
+        return docs

src/proof_agent.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from langgraph_agent import LangGraphAgent
+class ProofAgent:
+    """Thin compatibility wrapper around LangGraphAgent."""
+    def __init__(self, model_name: str = "qwen3-vl:4b", max_retries: int = 5):
+        self._agent = LangGraphAgent(model_name=model_name, max_retries=max_retries)
+    def solve_file(self, file_path: str) -> bool:
+        return self._agent.solve_file(file_path)

src/rag_chain.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from typing import List
+from langchain_core.documents import Document
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_ollama import OllamaLLM
+_SYSTEM = (
+    "You are an expert Lean 4 proof assistant with deep knowledge of Mathlib. "
+    "Your task is to complete the proof by replacing every `sorry` with valid Lean 4 tactic code. "
+    "Use only Mathlib theorems and tactics. "
+    "Respond ONLY with the corrected Lean code inside a single ```lean ... ``` block."
+)
+_HUMAN = """\
+## Current Lean Code
+```lean
+{lean_code}
+```
+## Open Proof Goals
+{goals}
+## Lean Errors
+{errors}
+## Relevant Mathlib Lemmas
+{retrieved_lemmas}
+Provide the corrected Lean code that solves all goals and fixes all errors.
+"""
+def _format_docs(docs: List[Document]) -> str:
+    if not docs:
+        return "(none retrieved)"
+    return "\n".join(
+        f"- `{d.metadata.get('name', '?')}`: {d.page_content}" for d in docs
+    )
+class RAGProofChain:
+    """
+    LangChain LCEL chain: retrieved context + proof state → corrected Lean code.
+    """
+    def __init__(self, model_name: str = "qwen3-vl:4b"):
+        prompt = ChatPromptTemplate.from_messages([
+            ("system", _SYSTEM),
+            ("human", _HUMAN),
+        ])
+        llm = OllamaLLM(model=model_name)
+        self._chain = prompt | llm | StrOutputParser()
+    def generate(
+        self,
+        lean_code: str,
+        goals: List[str],
+        errors: List[str],
+        retrieved_lemmas: List[Document],
+    ) -> str:
+        """
+        Generate corrected Lean code given the current proof state and retrieved lemmas.
+        """
+        return self._chain.invoke({
+            "lean_code": lean_code,
+            "goals": "\n".join(goals) or "(none)",
+            "errors": "\n".join(errors) or "(none)",
+            "retrieved_lemmas": _format_docs(retrieved_lemmas),
+        })

src/retriever.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import os
+from pathlib import Path
+from typing import List, Optional
+from langchain_community.retrievers import BM25Retriever
+from langchain_community.vectorstores import FAISS
+from langchain_community.cross_encoders import HuggingFaceCrossEncoder
+from langchain.retrievers import EnsembleRetriever
+from langchain.retrievers.document_compressors import CrossEncoderReranker
+from langchain.retrievers import ContextualCompressionRetriever
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_core.documents import Document
+from mathlib_corpus import MathLibCorpus
+_DEFAULT_INDEX_DIR = Path(__file__).resolve().parent.parent / "data" / "mathlib_index"
+_EMBED_MODEL = "all-MiniLM-L6-v2"
+_RERANK_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
+class MathLibRetriever:
+    """
+    Hybrid FAISS + BM25 retriever with CrossEncoder reranking over Mathlib lemmas.
+    On first use, call build() to create and persist the index.
+    Subsequent runs load from disk automatically.
+    """
+    def __init__(
+        self,
+        index_dir: Optional[str] = None,
+        top_k: int = 20,
+        rerank_top_k: int = 5,
+    ):
+        self.index_dir = Path(index_dir) if index_dir else _DEFAULT_INDEX_DIR
+        self.top_k = top_k
+        self.rerank_top_k = rerank_top_k
+        self._retriever = None
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+    def build(self, mathlib_root: Optional[str] = None, max_files: Optional[int] = None) -> None:
+        """
+        Extract Mathlib documents, build FAISS + BM25 indices, and persist to disk.
+        Call this once (via scripts/build_index.py) before first use.
+        """
+        print("Extracting Mathlib corpus…")
+        corpus = MathLibCorpus(mathlib_root=mathlib_root)
+        docs = corpus.extract(max_files=max_files)
+        print(f"  {len(docs)} declarations extracted.")
+        embeddings = self._embeddings()
+        print("Building FAISS index…")
+        faiss_store = FAISS.from_documents(docs, embeddings)
+        self.index_dir.mkdir(parents=True, exist_ok=True)
+        faiss_store.save_local(str(self.index_dir))
+        print(f"  Index saved to {self.index_dir}")
+        self._retriever = self._build_retriever(faiss_store, docs)
+    def retrieve(self, query: str, k: Optional[int] = None) -> List[Document]:
+        """
+        Retrieve and rerank the most relevant Mathlib lemmas for a query.
+        Args:
+            query: Natural-language or Lean-syntax query (e.g., proof goals + errors).
+            k: Number of results to return after reranking (defaults to self.rerank_top_k).
+        Returns:
+            List of Documents ranked by relevance.
+        """
+        if self._retriever is None:
+            self._load()
+        results = self._retriever.invoke(query)
+        return results[: k or self.rerank_top_k]
+    def is_index_built(self) -> bool:
+        return (self.index_dir / "index.faiss").exists()
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+    def _embeddings(self) -> HuggingFaceEmbeddings:
+        return HuggingFaceEmbeddings(model_name=_EMBED_MODEL)
+    def _load(self) -> None:
+        if not self.is_index_built():
+            raise RuntimeError(
+                f"No FAISS index found at {self.index_dir}. "
+                "Run `python scripts/build_index.py` first."
+            )
+        print("Loading FAISS index from disk…")
+        embeddings = self._embeddings()
+        faiss_store = FAISS.load_local(
+            str(self.index_dir),
+            embeddings,
+            allow_dangerous_deserialization=True,
+        )
+        # Re-build BM25 from FAISS docstore
+        docs = list(faiss_store.docstore._dict.values())
+        self._retriever = self._build_retriever(faiss_store, docs)
+    def _build_retriever(self, faiss_store: FAISS, docs: List[Document]):
+        faiss_retriever = faiss_store.as_retriever(
+            search_kwargs={"k": self.top_k}
+        )
+        bm25_retriever = BM25Retriever.from_documents(docs)
+        bm25_retriever.k = self.top_k
+        ensemble = EnsembleRetriever(
+            retrievers=[faiss_retriever, bm25_retriever],
+            weights=[0.6, 0.4],
+        )
+        cross_encoder = HuggingFaceCrossEncoder(model_name=_RERANK_MODEL)
+        reranker = CrossEncoderReranker(model=cross_encoder, top_n=self.rerank_top_k)
+        return ContextualCompressionRetriever(
+            base_compressor=reranker,
+            base_retriever=ensemble,
+        )

tests/test_lean_verifier.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import unittest
+import sys
+import os
+# Add src to Python path
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src')))
+from lean_verifier import LeanEnvironment
+class TestLeanVerifier(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        # We'll use Mathlib here since our goal is to verify it works with the MVP setup
+        cls.lean_env = LeanEnvironment(use_mathlib=True)
+    def test_correct_proof(self):
+        lean_code = """
+import Mathlib
+theorem add_comm_test (n m : Nat) : n + m = m + n := by
+  exact Nat.add_comm n m
+"""
+        result = self.lean_env.verify_proof(lean_code)
+        self.assertEqual(result["status"], "success")
+        self.assertEqual(len(result["errors"]), 0)
+        self.assertEqual(len(result["goals"]), 0)
+    def test_incorrect_proof_type_mismatch(self):
+        lean_code = """
+import Mathlib
+theorem add_comm_test (n m : Nat) : n + m = m + n := by
+  exact n
+"""
+        result = self.lean_env.verify_proof(lean_code)
+        self.assertEqual(result["status"], "failure")
+        self.assertTrue(any("type mismatch" in err or "application type mismatch" in err for err in result["errors"]),
+                        f"Expected type mismatch error, got: {result['errors']}")
+    def test_incomplete_proof_sorry(self):
+        lean_code = """
+import Mathlib
+theorem my_incomplete_thm (n : Nat) : n = 5 → n = 5 := by
+  sorry
+"""
+        result = self.lean_env.verify_proof(lean_code)
+        self.assertEqual(result["status"], "failure")
+        # Ensure it has an error indicating sorry
+        self.assertTrue(any("uses 'sorry'" in err for err in result["errors"]),
+                        f"Expected sorry warning/error, got: {result['errors']}")
+        # Ensure it outputs the goal
+        self.assertEqual(len(result["goals"]), 1)
+        self.assertIn("⊢ n = 5 → n = 5", result["goals"][0])
+if __name__ == '__main__':
+    unittest.main()