"""Model backends.
Three interchangeable backends behind one tiny interface:
backend.chat(system: str, user: str) -> str
- `transformers` : load the small model locally (default; GPU or CPU).
- `inference_api` : call the Hugging Face serverless Inference API (no GPU).
- `mock` : a deterministic fake that emits valid tagged output, so the
parser, engine and UI can be tested with no weights / network.
Pick with the MICRORPG_BACKEND env var. See README for all knobs.
"""
from __future__ import annotations
import os
import random
from typing import Protocol
DEFAULT_MODEL = os.environ.get("MICRORPG_MODEL", "Qwen/Qwen3-4B-Instruct-2507")
MAX_NEW_TOKENS = int(os.environ.get("MICRORPG_MAX_TOKENS", "512"))
class Backend(Protocol):
name: str
def chat(self, system: str, user: str) -> str: ...
# --------------------------------------------------------------------------- #
# transformers (local)
# --------------------------------------------------------------------------- #
class TransformersBackend:
name = "transformers"
def __init__(self, model_id: str = DEFAULT_MODEL):
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
self.model_id = model_id
adapter = os.environ.get("MICRORPG_ADAPTER") # fine-tuned LoRA dir, optional
# If an adapter is given, the tokenizer was saved alongside it (and may carry
# the right chat template) — prefer it; otherwise load the base tokenizer.
self.tokenizer = AutoTokenizer.from_pretrained(adapter or model_id)
dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
self.model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=dtype,
device_map="auto" if torch.cuda.is_available() else None,
)
if adapter:
from peft import PeftModel
self.model = PeftModel.from_pretrained(self.model, adapter)
print(f"[llm] loaded fine-tuned adapter: {adapter}")
self._torch = torch
def chat(self, system: str, user: str) -> str:
messages = [
{"role": "system", "content": system},
{"role": "user", "content": user},
]
inputs = self.tokenizer.apply_chat_template(
messages, add_generation_prompt=True, return_tensors="pt"
).to(self.model.device)
with self._torch.no_grad():
out = self.model.generate(
inputs,
max_new_tokens=MAX_NEW_TOKENS,
do_sample=True,
temperature=0.8,
top_p=0.9,
repetition_penalty=1.1,
pad_token_id=self.tokenizer.eos_token_id,
)
text = self.tokenizer.decode(
out[0][inputs.shape[-1]:], skip_special_tokens=True
)
return text.strip()
# --------------------------------------------------------------------------- #
# Hugging Face Inference API (serverless, no local GPU)
# --------------------------------------------------------------------------- #
class InferenceAPIBackend:
name = "inference_api"
def __init__(self, model_id: str = DEFAULT_MODEL):
from huggingface_hub import InferenceClient
token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
self.model_id = model_id
self.client = InferenceClient(model=model_id, token=token)
def chat(self, system: str, user: str) -> str:
resp = self.client.chat_completion(
messages=[
{"role": "system", "content": system},
{"role": "user", "content": user},
],
max_tokens=MAX_NEW_TOKENS,
temperature=0.8,
top_p=0.9,
)
return resp.choices[0].message.content.strip()
# --------------------------------------------------------------------------- #
# mock (no weights, no network) — emits valid tagged output
# --------------------------------------------------------------------------- #
class MockBackend:
"""Deterministic-ish fake model. It reads the action out of the user message
and produces a plausible tagged turn so the rest of the stack can be exercised
end-to-end without any model. Not smart — just well-formed."""
name = "mock"
_SCENES = [
("A cold wind drags mist across {loc}. Something shifts in the dark ahead.",
"ENEMY: Mist Wraith|hp=10|atk=3"),
("You find a leather pouch half-buried in the mud. Coins glint inside.",
"GOLD: +7"),
("An old hermit beckons you toward a flickering lantern.",
"NPC: Aldric|hermit|friendly|knows the old roads"),
("A rusted chest yields a glimmer of steel.",
"ITEM_ADD: Iron Shortsword"),
("The path opens onto a ruined chapel, its bell long silent.",
"LOCATION: The Ruined Chapel"),
]
def __init__(self, model_id: str = "mock"):
self.model_id = model_id
self._rng = random.Random(7)
def chat(self, system: str, user: str) -> str:
action = user.lower()
loc = "the crossroads"
for line in user.splitlines():
if line.lower().startswith("location:"):
loc = line.split(":", 1)[1].strip()
# Combat-aware: if the player attacks, hurt the enemy and take a hit back.
if "in combat" in action and any(
w in action for w in ("attack", "strike", "hit", "swing", "stab")
):
narrative = "You lunge forward and your blade bites home; the creature shrieks and claws back."
state = "ENEMY_HP: -6\nHP: -3\nXP: +4"
choices = ["1. Press the attack.", "2. Back away and guard.", "3. Try to flee."]
else:
scene, change = self._rng.choice(self._SCENES)
narrative = scene.format(loc=loc)
state = change
choices = ["1. Investigate closely.", "2. Move on carefully.", "3. Call out."]
return (
f"\n{narrative}\n\n"
f"\n{state}\n\n"
f"\n" + "\n".join(choices) + "\n"
)
# --------------------------------------------------------------------------- #
# factory
# --------------------------------------------------------------------------- #
def build_backend(kind: str | None = None, model_id: str | None = None) -> Backend:
kind = (kind or os.environ.get("MICRORPG_BACKEND", "transformers")).lower()
model_id = model_id or DEFAULT_MODEL
if kind == "mock":
return MockBackend()
if kind in ("inference_api", "api", "inference"):
return InferenceAPIBackend(model_id)
if kind in ("transformers", "local"):
return TransformersBackend(model_id)
raise ValueError(f"Unknown backend: {kind!r}")