"""Model backends. Three interchangeable backends behind one tiny interface: backend.chat(system: str, user: str) -> str - `transformers` : load the small model locally (default; GPU or CPU). - `inference_api` : call the Hugging Face serverless Inference API (no GPU). - `mock` : a deterministic fake that emits valid tagged output, so the parser, engine and UI can be tested with no weights / network. Pick with the MICRORPG_BACKEND env var. See README for all knobs. """ from __future__ import annotations import os import random from typing import Protocol DEFAULT_MODEL = os.environ.get("MICRORPG_MODEL", "Qwen/Qwen3-4B-Instruct-2507") MAX_NEW_TOKENS = int(os.environ.get("MICRORPG_MAX_TOKENS", "512")) class Backend(Protocol): name: str def chat(self, system: str, user: str) -> str: ... # --------------------------------------------------------------------------- # # transformers (local) # --------------------------------------------------------------------------- # class TransformersBackend: name = "transformers" def __init__(self, model_id: str = DEFAULT_MODEL): import torch from transformers import AutoModelForCausalLM, AutoTokenizer self.model_id = model_id adapter = os.environ.get("MICRORPG_ADAPTER") # fine-tuned LoRA dir, optional # If an adapter is given, the tokenizer was saved alongside it (and may carry # the right chat template) — prefer it; otherwise load the base tokenizer. self.tokenizer = AutoTokenizer.from_pretrained(adapter or model_id) dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32 self.model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=dtype, device_map="auto" if torch.cuda.is_available() else None, ) if adapter: from peft import PeftModel self.model = PeftModel.from_pretrained(self.model, adapter) print(f"[llm] loaded fine-tuned adapter: {adapter}") self._torch = torch def chat(self, system: str, user: str) -> str: messages = [ {"role": "system", "content": system}, {"role": "user", "content": user}, ] inputs = self.tokenizer.apply_chat_template( messages, add_generation_prompt=True, return_tensors="pt" ).to(self.model.device) with self._torch.no_grad(): out = self.model.generate( inputs, max_new_tokens=MAX_NEW_TOKENS, do_sample=True, temperature=0.8, top_p=0.9, repetition_penalty=1.1, pad_token_id=self.tokenizer.eos_token_id, ) text = self.tokenizer.decode( out[0][inputs.shape[-1]:], skip_special_tokens=True ) return text.strip() # --------------------------------------------------------------------------- # # Hugging Face Inference API (serverless, no local GPU) # --------------------------------------------------------------------------- # class InferenceAPIBackend: name = "inference_api" def __init__(self, model_id: str = DEFAULT_MODEL): from huggingface_hub import InferenceClient token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") self.model_id = model_id self.client = InferenceClient(model=model_id, token=token) def chat(self, system: str, user: str) -> str: resp = self.client.chat_completion( messages=[ {"role": "system", "content": system}, {"role": "user", "content": user}, ], max_tokens=MAX_NEW_TOKENS, temperature=0.8, top_p=0.9, ) return resp.choices[0].message.content.strip() # --------------------------------------------------------------------------- # # mock (no weights, no network) — emits valid tagged output # --------------------------------------------------------------------------- # class MockBackend: """Deterministic-ish fake model. It reads the action out of the user message and produces a plausible tagged turn so the rest of the stack can be exercised end-to-end without any model. Not smart — just well-formed.""" name = "mock" _SCENES = [ ("A cold wind drags mist across {loc}. Something shifts in the dark ahead.", "ENEMY: Mist Wraith|hp=10|atk=3"), ("You find a leather pouch half-buried in the mud. Coins glint inside.", "GOLD: +7"), ("An old hermit beckons you toward a flickering lantern.", "NPC: Aldric|hermit|friendly|knows the old roads"), ("A rusted chest yields a glimmer of steel.", "ITEM_ADD: Iron Shortsword"), ("The path opens onto a ruined chapel, its bell long silent.", "LOCATION: The Ruined Chapel"), ] def __init__(self, model_id: str = "mock"): self.model_id = model_id self._rng = random.Random(7) def chat(self, system: str, user: str) -> str: action = user.lower() loc = "the crossroads" for line in user.splitlines(): if line.lower().startswith("location:"): loc = line.split(":", 1)[1].strip() # Combat-aware: if the player attacks, hurt the enemy and take a hit back. if "in combat" in action and any( w in action for w in ("attack", "strike", "hit", "swing", "stab") ): narrative = "You lunge forward and your blade bites home; the creature shrieks and claws back." state = "ENEMY_HP: -6\nHP: -3\nXP: +4" choices = ["1. Press the attack.", "2. Back away and guard.", "3. Try to flee."] else: scene, change = self._rng.choice(self._SCENES) narrative = scene.format(loc=loc) state = change choices = ["1. Investigate closely.", "2. Move on carefully.", "3. Call out."] return ( f"\n{narrative}\n\n" f"\n{state}\n\n" f"\n" + "\n".join(choices) + "\n" ) # --------------------------------------------------------------------------- # # factory # --------------------------------------------------------------------------- # def build_backend(kind: str | None = None, model_id: str | None = None) -> Backend: kind = (kind or os.environ.get("MICRORPG_BACKEND", "transformers")).lower() model_id = model_id or DEFAULT_MODEL if kind == "mock": return MockBackend() if kind in ("inference_api", "api", "inference"): return InferenceAPIBackend(model_id) if kind in ("transformers", "local"): return TransformersBackend(model_id) raise ValueError(f"Unknown backend: {kind!r}")