Spaces:
Running
Running
| """Hugging Face Space entry point for Mindlock. | |
| One process serves everything (the Off-Brand pattern): | |
| / β the game: custom canvas front (FastAPI + static), the "open the skull" UI | |
| /about β a Gradio block with how-to-play, links and the eligibility map | |
| Backends, picked by env: | |
| MINDLOCK_FAKE=1 deterministic demo, no models | |
| (default, laptop) Ollama β minicpm-v4.6 + nemotron-3-nano:4b | |
| MINDLOCK_BACKEND=llamacpp llama.cpp runtime: llama-server subprocesses, | |
| GGUF weights pulled from the Hub on first boot | |
| python app.py # laptop (Ollama) | |
| MINDLOCK_FAKE=1 python app.py # no models | |
| MINDLOCK_BACKEND=llamacpp python app.py # llama.cpp (the Space path) | |
| """ | |
| import atexit | |
| import os | |
| import shutil | |
| import subprocess | |
| import sys | |
| import time | |
| import urllib.request | |
| sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "src")) | |
| # --------------------------------------------------------------- llama.cpp boot (Space) | |
| # Two single-model servers (sensors + voice), weights from the Hub. Started BEFORE the | |
| # game session imports so health checks see them. | |
| _LLAMA = [ | |
| # (env host var, port, repo, file, ) | |
| ("MINDLOCK_LLAMA_HOST", 8091, os.environ.get("MINDLOCK_GGUF_REPO", "openbmb/MiniCPM5-1B-GGUF"), | |
| os.environ.get("MINDLOCK_GGUF_FILE", "MiniCPM5-1B-Q4_K_M.gguf")), | |
| ("MINDLOCK_LLAMA_DLPFC_HOST", 8092, os.environ.get("MINDLOCK_GGUF_DLPFC_REPO", "nvidia/NVIDIA-Nemotron-3-Nano-4B-GGUF"), | |
| os.environ.get("MINDLOCK_GGUF_DLPFC_FILE", "NVIDIA-Nemotron3-Nano-4B-Q4_K_M.gguf")), | |
| ] | |
| def _fetch_llama_server() -> str: | |
| """No system llama-server (the Space): pull the official prebuilt linux-x64 binary | |
| from the llama.cpp release β a 5MB zip beats a 30-minute source build of the wheel.""" | |
| import io | |
| import json as _json | |
| import tarfile | |
| cache = os.path.expanduser("~/.cache/mindlock/llama") | |
| marker = os.path.join(cache, ".ready") | |
| if os.path.exists(marker): | |
| with open(marker) as fh: | |
| return fh.read().strip() | |
| os.makedirs(cache, exist_ok=True) | |
| rel = _json.loads(urllib.request.urlopen( | |
| "https://api.github.com/repos/ggml-org/llama.cpp/releases/latest", timeout=30).read()) | |
| asset = next(a for a in rel["assets"] | |
| if "bin-ubuntu-x64" in a["name"] and a["name"].endswith(".tar.gz")) | |
| blob = urllib.request.urlopen(asset["browser_download_url"], timeout=300).read() | |
| tarfile.open(fileobj=io.BytesIO(blob), mode="r:gz").extractall(cache) | |
| binpath = "" | |
| for root, _, files in os.walk(cache): # binary + its .so live in build/bin | |
| for f in files: | |
| full = os.path.join(root, f) | |
| if f == "llama-server": | |
| os.chmod(full, 0o755) | |
| binpath = full | |
| elif f.endswith(".so"): | |
| os.chmod(full, 0o755) | |
| if not binpath: | |
| raise RuntimeError("llama-server not found inside the release archive") | |
| os.environ["LD_LIBRARY_PATH"] = (os.path.dirname(binpath) + ":" | |
| + os.environ.get("LD_LIBRARY_PATH", "")) | |
| with open(marker, "w") as fh: | |
| fh.write(binpath) | |
| return binpath | |
| def _llama_cmd(path: str, port: int) -> list[str]: | |
| server = shutil.which("llama-server") or _fetch_llama_server() | |
| return [server, "-m", path, "--port", str(port), "-c", "4096", "--no-warmup"] | |
| def _boot_llama() -> None: | |
| from huggingface_hub import hf_hub_download | |
| for env_var, port, repo, fname in _LLAMA: | |
| path = hf_hub_download(repo_id=repo, filename=fname) | |
| proc = subprocess.Popen( | |
| _llama_cmd(path, port), | |
| stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, | |
| ) | |
| atexit.register(proc.terminate) | |
| os.environ[env_var] = f"http://127.0.0.1:{port}" | |
| deadline = time.time() + 300 | |
| for _, port, _, _ in _LLAMA: | |
| while time.time() < deadline: | |
| try: | |
| urllib.request.urlopen(f"http://127.0.0.1:{port}/health", timeout=2) | |
| break | |
| except Exception: # noqa: BLE001 | |
| time.sleep(1.5) | |
| # warm each server with one real templated token in the background, so the first | |
| # player turn doesn't pay the cold prompt-processing cost on top of its own | |
| import json as _json | |
| import threading | |
| def _warm(port: int) -> None: | |
| try: | |
| req = urllib.request.Request( | |
| f"http://127.0.0.1:{port}/v1/chat/completions", | |
| data=_json.dumps({"model": "default", "max_tokens": 1, | |
| "messages": [{"role": "user", "content": "hi"}]}).encode(), | |
| headers={"Content-Type": "application/json"}) | |
| urllib.request.urlopen(req, timeout=120) | |
| except Exception: # noqa: BLE001 β warmup is best-effort | |
| pass | |
| for _, port, _, _ in _LLAMA: | |
| threading.Thread(target=_warm, args=(port,), daemon=True).start() | |
| if os.environ.get("SPACE_ID") and "MINDLOCK_BACKEND" not in os.environ: | |
| os.environ["MINDLOCK_BACKEND"] = "llamacpp" # a Space has no Ollama β default to llama.cpp | |
| if os.environ.get("MINDLOCK_BACKEND") == "llamacpp" and not os.environ.get("MINDLOCK_FAKE"): | |
| _boot_llama() | |
| import gradio as gr # noqa: E402 | |
| from mindlock.game.server import app # noqa: E402 β the FastAPI game (custom front at /) | |
| ABOUT_MD = """ | |
| # π§ MINDLOCK | |
| **An escape room where every character is not one AI β but a hierarchy of tiny offline | |
| language models: the departments of one mind.** You don't pick a lock. You change a mind. | |
| **[βΆ Play](/)** Β· every NPC runs a 6-region brain cascade (amygdala β hippocampus β | |
| striatum β ACC β vmPFC β dlPFC) on small local models. Cruelty burns their finite | |
| thinking tokens; empathy lets them heal. A mind that reaches zero is gone β with | |
| everything it knew. | |
| ## How to play | |
| - **WASD / arrows** β move Β· **E** β talk / use Β· **π§ / /brain** β open the skull | |
| - Each room: someone holds the key. Someone else knows what reaches them. Listen, | |
| learn the word that matters, say it like you mean it. | |
| - **Story** β ten rooms of one man's memory. Find out who you are. *(~30 min)* | |
| - **Endless** β procedurally generated minds, forever. | |
| - **π Watch a mind** (menu) β a *real* recorded session replays instantly, zero model | |
| calls, so you can see the cascade, the token burn and a mind's death without waiting. | |
| ## The brain is real, not a metaphor | |
| - **[The engine β](/docs/architecture)** β the value-network + dual-system grounding, the | |
| deterministic vmPFC, every mechanic tied to a citation (not the debunked triune brain). | |
| - **[A recorded mind β](/docs/trace)** β one NPC's full six-region deliberation, two ways: | |
| reached with empathy, and burned down with cruelty until it dies. | |
| - **Conviction** β each region shows how sharply it committed, read from the model's own | |
| token logits. A hosted chat API never exposes that; only a local mind can. | |
| ## Small models, doing the carrying | |
| | Role | Model | | |
| |---|---| | |
| | Sensory regions (threat, memory, habit, cost) | MiniCPM (OpenBMB) | | |
| | Voice β the words a mind says out loud | Nemotron 3 Nano 4B (NVIDIA) | | |
| | Runtime | llama.cpp / Ollama β fully offline | | |
| *Links: demo video Β· social post Β· GitHub repo β added at submission.* | |
| --- | |
| π€ *Built small for the **Hugging Face Γ Gradio** hackathon (Thousand Token Wood track). | |
| Minds by **OpenBMB MiniCPM** and **NVIDIA Nemotron**, running on **llama.cpp** β fully offline. | |
| Original soundtrack by **DjinAscet**.* | |
| """ | |
| about = gr.Blocks(title="Mindlock β about") | |
| with about: | |
| gr.Markdown(ABOUT_MD) | |
| app = gr.mount_gradio_app(app, about, path="/about") | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", os.environ.get("MINDLOCK_PORT", 7860)))) | |