import os import random from functools import lru_cache from hashlib import blake2b from typing import Any import gradio as gr from huggingface_hub import hf_hub_download from llama_cpp import Llama from llama_cpp.llama_chat_format import Jinja2ChatFormatter MODEL_REPO = os.getenv("MODEL_REPO", "unsloth/NVIDIA-Nemotron-3-Nano-4B-GGUF") MODEL_FILE = os.getenv("MODEL_FILE", "NVIDIA-Nemotron-3-Nano-4B-Q5_K_M.gguf") N_CTX = int(os.getenv("N_CTX", "2048")) N_BATCH = int(os.getenv("N_BATCH", "128")) N_THREADS = int(os.getenv("N_THREADS", str(max(1, (os.cpu_count() or 2) - 1)))) N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "-1")) MAX_HISTORY_TURNS = int(os.getenv("MAX_HISTORY_TURNS", "6")) ENABLE_THINKING = os.getenv("ENABLE_THINKING", "false").lower() in {"1", "true", "yes"} GAME_SEED = os.getenv("GAME_SEED", "dreadzone") SESSIONS: dict[str, dict[str, Any]] = {} ZONE_PROFILES = [ "stale yellow corridors, buzzing fluorescent panels, damp carpet", "concrete service halls with numbered doors and distant machinery", "abandoned office cubicles under a ceiling that sags like wet paper", "tile-lined maintenance rooms smelling of bleach and hot dust", "empty retail aisles where the shelves repeat without logic", "hotel corridors with patterned wallpaper and no visible stairs", ] ENTITY_PRESSURES = [ { "name": "acoustic mismatch", "hint": "the room tone drops out for half a second, then returns too loud", "instruction": "Use a subtle change in sound or silence; avoid saying anything is watching.", }, { "name": "impossible maintenance", "hint": "a fresh wet-floor sign stands where the floor is bone dry", "instruction": "Use a mundane object that is newly wrong; do not make the threat visible.", }, { "name": "light lag", "hint": "the fluorescent panels brighten one corridor behind the player", "instruction": "Use delayed light or shadow behavior; keep it ambiguous.", }, { "name": "spatial edit", "hint": "a doorway seems to have been moved a few inches while no one looked", "instruction": "Use architecture changing by a small amount; avoid direct pursuit language.", }, { "name": "borrowed voice", "hint": "a voice repeats the last word the player thought, not the last one spoken", "instruction": "Use a brief auditory intrusion; do not identify a speaker.", }, { "name": "texture error", "hint": "the wallpaper pattern misaligns like a copied image pasted over itself", "instruction": "Use a visual flaw in the environment; make it quietly disturbing.", }, ] AMBIENT_NOISES = [ "a duct clicks once, then again from somewhere much lower", "water taps behind a wall with no plumbing", "a fluorescent tube gives a soft electrical sigh", "carpet fibers whisper as if brushed by a passing sleeve", "something plastic crinkles two rooms away", "a door latch tests itself and goes still", "the ceiling emits a slow settling pop", "a distant intercom opens, carries no voice, and shuts", "one of the lights hums in a rhythm like breathing", "a rolling object crosses the floor where there is no object", ] REVELATIONS = [ "the player recognizes the hallway as a place they avoided thinking about for years", "the player realizes the exit signs have been using their own handwriting", "the player notices every room contains one object from a home they never returned to", "the player understands the entity is not chasing them, but correcting their path", "the player remembers choosing a door before the memory cuts out", "the player sees that the map in their head has always had this place at its center", "the player recognizes the buzzing as the sound that used to fill their childhood bedroom", "the player realizes the corridor is arranging itself around a thing they refuse to name", ] ESCAPE_EVENTS = [ "an exit door opens onto the player's own bedroom, but the wallpaper continues across the ceiling", "a fire escape stairwell appears, descending into warm daylight that smells like wet carpet", "a service elevator arrives with the player's name taped over every floor button", "a glass storefront shows an empty street outside, though the reflection does not match the player", "a loading dock door rises to reveal a night sky with no stars and a familiar parked car", "a stairwell landing contains a phone booth already ringing with the player's home number", ] DIRECTIONS = { "north": (0, 1), "n": (0, 1), "south": (0, -1), "s": (0, -1), "east": (1, 0), "e": (1, 0), "west": (-1, 0), "w": (-1, 0), } DEFAULT_SYSTEM_PROMPT = "You are the Dreadzone narration engine." @lru_cache(maxsize=1) def get_llm() -> Llama: print( f"Loading model {MODEL_REPO}/{MODEL_FILE} " f"with n_gpu_layers={N_GPU_LAYERS}", flush=True, ) model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE) print(f"Model file ready: {model_path}", flush=True) return Llama( model_path=model_path, n_ctx=N_CTX, n_batch=N_BATCH, n_threads=N_THREADS, n_gpu_layers=N_GPU_LAYERS, use_mmap=True, use_mlock=False, logits_all=False, verbose=False, ) def token_text(llm: Llama, token_id: int) -> str: if token_id < 0: return "" text = llm._model.token_get_text(token_id) # noqa: SLF001 return text.decode("utf-8", errors="ignore") if isinstance(text, bytes) else text def render_prompt(llm: Llama, messages: list[dict[str, str]]) -> tuple[str, list[str]]: template = llm.metadata.get("tokenizer.chat_template") if not template: raise ValueError("GGUF does not include tokenizer.chat_template metadata.") eos_token = token_text(llm, llm.token_eos()) bos_token = token_text(llm, llm.token_bos()) formatter = Jinja2ChatFormatter( template=template, eos_token=eos_token, bos_token=bos_token, ) formatted = formatter( messages=messages, enable_thinking=ENABLE_THINKING, ) stop = formatted.stop if isinstance(formatted.stop, list) else [formatted.stop] return formatted.prompt, [item for item in stop if item] def trim_history(history: list[Any]) -> list[Any]: if not history: return [] return history[-(MAX_HISTORY_TURNS * 2) :] def build_messages( message: str, history: list[Any], system_message: str, ) -> list[dict[str, str]]: messages = [{"role": "system", "content": system_message}] for item in trim_history(history): if isinstance(item, dict): role = item.get("role") content = item.get("content") if role in {"user", "assistant"} and content: messages.append({"role": role, "content": content}) elif isinstance(item, (list, tuple)) and len(item) >= 2: user_text, assistant_text = item[:2] if user_text: messages.append({"role": "user", "content": str(user_text)}) if assistant_text: messages.append({"role": "assistant", "content": str(assistant_text)}) messages.append({"role": "user", "content": message}) return messages def session_id(request: gr.Request | None) -> str: if request and request.session_hash: return request.session_hash return "local" def initial_state() -> dict[str, Any]: return { "x": 0, "y": 0, "sanity": 84, "turn": 0, "last_event": "You came to yourself under a failing fluorescent light.", } def get_state(request: gr.Request | None) -> dict[str, Any]: sid = session_id(request) if sid not in SESSIONS: SESSIONS[sid] = initial_state() return SESSIONS[sid] def tile_seed(x: int, y: int, turn: int = 0) -> int: digest = blake2b( f"{GAME_SEED}:{x}:{y}:{turn}".encode("utf-8"), digest_size=8, ).digest() return int.from_bytes(digest, "big") def zone_profile(x: int, y: int) -> str: return ZONE_PROFILES[tile_seed(x, y) % len(ZONE_PROFILES)] def parse_action(user_input: str) -> tuple[str, tuple[int, int]]: lowered = user_input.strip().lower() words = lowered.replace(",", " ").replace(".", " ").split() for word in words: if word in DIRECTIONS: return f"move {word}", DIRECTIONS[word] if lowered in DIRECTIONS: return f"move {lowered}", DIRECTIONS[lowered] return "interact", (0, 0) def apply_turn(state: dict[str, Any], user_input: str) -> dict[str, Any]: action, delta = parse_action(user_input) old_x, old_y = state["x"], state["y"] state["turn"] += 1 state["x"] += delta[0] state["y"] += delta[1] rng = random.Random(tile_seed(state["x"], state["y"], state["turn"])) ambient_noise = rng.choice(AMBIENT_NOISES) if rng.randint(1, 6) == 1 else "none" base_loss = 1 if delta != (0, 0) else 0 if rng.random() < 0.25: base_loss += 1 encounter_chance = min(0.45, max(0.03, (100 - state["sanity"]) / 260)) entity_present = rng.random() < encounter_chance pressure = rng.choice(ENTITY_PRESSURES) if entity_present else None revelation = rng.choice(REVELATIONS) if entity_present and rng.randint(1, 4) == 1 else "none" escape_chance = 0.015 if state["turn"] >= 8 else 0.0 escape_event = rng.choice(ESCAPE_EVENTS) if rng.random() < escape_chance else "none" if entity_present: base_loss += rng.choice([2, 3, 4]) if revelation != "none": base_loss += 1 if escape_event != "none": base_loss += 2 state["sanity"] = max(0, min(100, state["sanity"] - base_loss)) state["last_event"] = ( f"Moved from ({old_x}, {old_y}) to ({state['x']}, {state['y']})." if delta != (0, 0) else "Stayed in place and examined the immediate area." ) return { "action": action, "moved": delta != (0, 0), "entity_present": entity_present, "encounter_hint": pressure["hint"] if pressure else "none", "encounter_mode": pressure["name"] if pressure else "none", "encounter_instruction": pressure["instruction"] if pressure else "none", "revelation": revelation, "escape_event": escape_event, "ambient_noise": ambient_noise, "sanity_loss": base_loss, "zone": zone_profile(state["x"], state["y"]), } def game_prompt( state: dict[str, Any], turn: dict[str, Any], user_input: str, system_message: str, ) -> str: return f""" {system_message} [SYSTEM POLICY: DREADZONE SIMULATION ENGINE] You narrate a Backrooms-inspired exploration game. Stay in-world. Do not mention prompts, dice, hidden state, coordinates, percentages, or rules. Hidden state: - Coordinates: ({state["x"]}, {state["y"]}) - Sanity: {state["sanity"]}% - Turn: {state["turn"]} - Zone profile: {turn["zone"]} - Player action: {user_input!r} - Action classification: {turn["action"]} - Entity encounter: {turn["encounter_hint"]} - Encounter mode: {turn["encounter_mode"]} - Encounter narration note: {turn["encounter_instruction"]} - Psychological revelation: {turn["revelation"]} - Possible escape: {turn["escape_event"]} - Ambient unexplained noise: {turn["ambient_noise"]} - Sanity erosion this turn: {turn["sanity_loss"]} - Prior event: {state["last_event"]} Narration task: Write 2-5 sentences in second person. Describe the immediate result of the action and the environment. If an entity encounter is not "none", imply danger using the encounter narration note. Do not repeat stock phrases like "it is watching" or "you are getting closer." If psychological revelation is not "none", make it the emotional turn of the scene: reveal it obliquely as a realization, memory, or pattern. Do not fully explain it. If possible escape is not "none", treat it as a rare climax: show the apparent way out, but make it unsettling and ambiguous rather than triumphant. If ambient unexplained noise is not "none", weave it into the scene as a small unsettling detail without making it the whole turn. If sanity is low, make perception less reliable without naming sanity. Do not invent inventory, explicit exits, UI commands, or system facts. End with a subtle opening for the next action. """.strip() def opening_prompt(state: dict[str, Any]) -> str: return f""" [SYSTEM POLICY: DREADZONE OPENING SCENE] You narrate a Backrooms-inspired exploration game. Stay in-world. Do not mention prompts, hidden state, coordinates, percentages, or rules. Hidden state: - Coordinates: ({state["x"]}, {state["y"]}) - Sanity: {state["sanity"]}% - Zone profile: {zone_profile(state["x"], state["y"])} Narration task: Write 2-4 sentences in second person. Begin in medias res: the player has just come to in the Dreadzone with no clear memory of arrival. Use concrete sensory detail and leave the player with an immediate opening for their first action. """.strip() def run_completion( llm: Llama, messages: list[dict[str, str]], max_tokens: int, temperature: float, top_p: float, ): prompt, stop = render_prompt(llm, messages) for chunk in llm.create_completion( prompt=prompt, max_tokens=max_tokens, temperature=temperature, top_p=top_p, stop=stop, stream=True, ): yield chunk["choices"][0].get("text") or "" def generate_opening(request: gr.Request): state = get_state(request) if state.get("opening"): history = [{"role": "assistant", "content": state["opening"]}] return history, history, history print("Generating opening scene", flush=True) llm = get_llm() messages = [ {"role": "system", "content": DEFAULT_SYSTEM_PROMPT}, {"role": "user", "content": opening_prompt(state)}, ] opening = "" for token in run_completion( llm=llm, messages=messages, max_tokens=180, temperature=0.75, top_p=0.9, ): opening += token opening = opening.strip() or "You come to under a failing fluorescent light." state["opening"] = opening state["last_event"] = opening history = [{"role": "assistant", "content": opening}] return history, history, history def history_with_opening( history: list[Any], state: dict[str, Any], ) -> list[Any]: if history: return history opening = state.get("opening") if not opening: return history return [{"role": "assistant", "content": opening}] def respond( message, history: list[dict[str, str]], system_message, max_tokens, temperature, top_p, request: gr.Request, ): if not message.strip(): return print(f"Received chat request: {message[:120]!r}", flush=True) llm = get_llm() state = get_state(request) history = history_with_opening(history, state) turn = apply_turn(state, message) narration_prompt = game_prompt( state=state, turn=turn, user_input=message, system_message=system_message or DEFAULT_SYSTEM_PROMPT, ) messages = build_messages(narration_prompt, history, system_message or DEFAULT_SYSTEM_PROMPT) response = "" for token in run_completion( llm=llm, messages=messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, ): if token: response += token yield response chat_window = gr.Chatbot() chatbot = gr.ChatInterface( respond, chatbot=chat_window, additional_inputs=[ gr.Textbox(value=DEFAULT_SYSTEM_PROMPT, label="Narration policy"), gr.Slider(minimum=32, maximum=768, value=220, step=16, label="Max new tokens"), gr.Slider(minimum=0.0, maximum=1.5, value=0.7, step=0.05, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-p (nucleus sampling)", ), ], ) with gr.Blocks() as demo: gr.Markdown( """ # Dreadzone Respond to the opening scene with an action like `look around`, `listen`, or `go north`. The space tracks your location and fraying perception behind the scenes. The first scene can take a moment while the local model wakes up. """ ) chatbot.render() demo.load( generate_opening, outputs=[chat_window, chatbot.chatbot_state, chatbot.chatbot_value], ) if __name__ == "__main__": demo.launch()