--- title: "Can LLMs Play the Game of Science?" subtitle: "Evaluating scientific reasoning and metacognition using the card game Eleusis reveals distinct scientist personalities in large language models" description: "A benchmark for evaluating LLM scientific reasoning using the card game Eleusis, testing iterative hypothesis formation, calibration, and strategic experimentation." authors: - name: "David Louapre" url: "https://huggingface.co/dlouapre" affiliations: [1] affiliations: - name: "Hugging Face" url: "https://huggingface.co" published: "Feb. 09, 2026" licence: > Diagrams and text are licensed under CC‑BY 4.0 with the source available on Hugging Face, unless noted otherwise. tags: - LLM evaluation - scientific reasoning - benchmarks - calibration tableOfContentsAutoCollapse: true pdfProOnly: false showPdf: true --- import Introduction from "./chapters/eleusis/introduction.mdx"; import Benchmark from "./chapters/eleusis/benchmark.mdx"; import Results from "./chapters/eleusis/results.mdx"; import Discussion from "./chapters/eleusis/discussion.mdx"; import Appendix from "./chapters/eleusis/appendix.mdx";