| """Model backends. |
| |
| Three interchangeable backends behind one tiny interface: |
| |
| backend.chat(system: str, user: str) -> str |
| |
| - `transformers` : load the small model locally (default; GPU or CPU). |
| - `inference_api` : call the Hugging Face serverless Inference API (no GPU). |
| - `mock` : a deterministic fake that emits valid tagged output, so the |
| parser, engine and UI can be tested with no weights / network. |
| |
| Pick with the MICRORPG_BACKEND env var. See README for all knobs. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import os |
| import random |
| from typing import Protocol |
|
|
|
|
| DEFAULT_MODEL = os.environ.get("MICRORPG_MODEL", "Qwen/Qwen3-4B-Instruct-2507") |
| MAX_NEW_TOKENS = int(os.environ.get("MICRORPG_MAX_TOKENS", "512")) |
|
|
|
|
| class Backend(Protocol): |
| name: str |
|
|
| def chat(self, system: str, user: str) -> str: ... |
|
|
|
|
| |
| |
| |
| class TransformersBackend: |
| name = "transformers" |
|
|
| def __init__(self, model_id: str = DEFAULT_MODEL): |
| import torch |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
| self.model_id = model_id |
| adapter = os.environ.get("MICRORPG_ADAPTER") |
|
|
| |
| |
| self.tokenizer = AutoTokenizer.from_pretrained(adapter or model_id) |
| dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32 |
| self.model = AutoModelForCausalLM.from_pretrained( |
| model_id, |
| torch_dtype=dtype, |
| device_map="auto" if torch.cuda.is_available() else None, |
| ) |
| if adapter: |
| from peft import PeftModel |
| self.model = PeftModel.from_pretrained(self.model, adapter) |
| print(f"[llm] loaded fine-tuned adapter: {adapter}") |
| self._torch = torch |
|
|
| def chat(self, system: str, user: str) -> str: |
| messages = [ |
| {"role": "system", "content": system}, |
| {"role": "user", "content": user}, |
| ] |
| inputs = self.tokenizer.apply_chat_template( |
| messages, add_generation_prompt=True, return_tensors="pt" |
| ).to(self.model.device) |
|
|
| with self._torch.no_grad(): |
| out = self.model.generate( |
| inputs, |
| max_new_tokens=MAX_NEW_TOKENS, |
| do_sample=True, |
| temperature=0.8, |
| top_p=0.9, |
| repetition_penalty=1.1, |
| pad_token_id=self.tokenizer.eos_token_id, |
| ) |
| text = self.tokenizer.decode( |
| out[0][inputs.shape[-1]:], skip_special_tokens=True |
| ) |
| return text.strip() |
|
|
|
|
| |
| |
| |
| class InferenceAPIBackend: |
| name = "inference_api" |
|
|
| def __init__(self, model_id: str = DEFAULT_MODEL): |
| from huggingface_hub import InferenceClient |
|
|
| token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") |
| self.model_id = model_id |
| self.client = InferenceClient(model=model_id, token=token) |
|
|
| def chat(self, system: str, user: str) -> str: |
| resp = self.client.chat_completion( |
| messages=[ |
| {"role": "system", "content": system}, |
| {"role": "user", "content": user}, |
| ], |
| max_tokens=MAX_NEW_TOKENS, |
| temperature=0.8, |
| top_p=0.9, |
| ) |
| return resp.choices[0].message.content.strip() |
|
|
|
|
| |
| |
| |
| class MockBackend: |
| """Deterministic-ish fake model. It reads the action out of the user message |
| and produces a plausible tagged turn so the rest of the stack can be exercised |
| end-to-end without any model. Not smart — just well-formed.""" |
|
|
| name = "mock" |
|
|
| _SCENES = [ |
| ("A cold wind drags mist across {loc}. Something shifts in the dark ahead.", |
| "ENEMY: Mist Wraith|hp=10|atk=3"), |
| ("You find a leather pouch half-buried in the mud. Coins glint inside.", |
| "GOLD: +7"), |
| ("An old hermit beckons you toward a flickering lantern.", |
| "NPC: Aldric|hermit|friendly|knows the old roads"), |
| ("A rusted chest yields a glimmer of steel.", |
| "ITEM_ADD: Iron Shortsword"), |
| ("The path opens onto a ruined chapel, its bell long silent.", |
| "LOCATION: The Ruined Chapel"), |
| ] |
|
|
| def __init__(self, model_id: str = "mock"): |
| self.model_id = model_id |
| self._rng = random.Random(7) |
|
|
| def chat(self, system: str, user: str) -> str: |
| action = user.lower() |
| loc = "the crossroads" |
| for line in user.splitlines(): |
| if line.lower().startswith("location:"): |
| loc = line.split(":", 1)[1].strip() |
|
|
| |
| if "in combat" in action and any( |
| w in action for w in ("attack", "strike", "hit", "swing", "stab") |
| ): |
| narrative = "You lunge forward and your blade bites home; the creature shrieks and claws back." |
| state = "ENEMY_HP: -6\nHP: -3\nXP: +4" |
| choices = ["1. Press the attack.", "2. Back away and guard.", "3. Try to flee."] |
| else: |
| scene, change = self._rng.choice(self._SCENES) |
| narrative = scene.format(loc=loc) |
| state = change |
| choices = ["1. Investigate closely.", "2. Move on carefully.", "3. Call out."] |
|
|
| return ( |
| f"<narrative>\n{narrative}\n</narrative>\n" |
| f"<state>\n{state}\n</state>\n" |
| f"<choices>\n" + "\n".join(choices) + "\n</choices>" |
| ) |
|
|
|
|
| |
| |
| |
| def build_backend(kind: str | None = None, model_id: str | None = None) -> Backend: |
| kind = (kind or os.environ.get("MICRORPG_BACKEND", "transformers")).lower() |
| model_id = model_id or DEFAULT_MODEL |
|
|
| if kind == "mock": |
| return MockBackend() |
| if kind in ("inference_api", "api", "inference"): |
| return InferenceAPIBackend(model_id) |
| if kind in ("transformers", "local"): |
| return TransformersBackend(model_id) |
| raise ValueError(f"Unknown backend: {kind!r}") |
|
|