Spaces:

build-small-hackathon
/

micro-rpg-engine

Running

File size: 6,915 Bytes

7fe39f3

"""Model backends.

Three interchangeable backends behind one tiny interface:

    backend.chat(system: str, user: str) -> str

- `transformers`    : load the small model locally (default; GPU or CPU).
- `inference_api`   : call the Hugging Face serverless Inference API (no GPU).
- `mock`            : a deterministic fake that emits valid tagged output, so the
                      parser, engine and UI can be tested with no weights / network.

Pick with the MICRORPG_BACKEND env var. See README for all knobs.
"""

from __future__ import annotations

import os
import random
from typing import Protocol


DEFAULT_MODEL = os.environ.get("MICRORPG_MODEL", "Qwen/Qwen3-4B-Instruct-2507")
MAX_NEW_TOKENS = int(os.environ.get("MICRORPG_MAX_TOKENS", "512"))


class Backend(Protocol):
    name: str

    def chat(self, system: str, user: str) -> str: ...


# --------------------------------------------------------------------------- #
# transformers (local)
# --------------------------------------------------------------------------- #
class TransformersBackend:
    name = "transformers"

    def __init__(self, model_id: str = DEFAULT_MODEL):
        import torch
        from transformers import AutoModelForCausalLM, AutoTokenizer

        self.model_id = model_id
        adapter = os.environ.get("MICRORPG_ADAPTER")  # fine-tuned LoRA dir, optional

        # If an adapter is given, the tokenizer was saved alongside it (and may carry
        # the right chat template) — prefer it; otherwise load the base tokenizer.
        self.tokenizer = AutoTokenizer.from_pretrained(adapter or model_id)
        dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
        self.model = AutoModelForCausalLM.from_pretrained(
            model_id,
            torch_dtype=dtype,
            device_map="auto" if torch.cuda.is_available() else None,
        )
        if adapter:
            from peft import PeftModel
            self.model = PeftModel.from_pretrained(self.model, adapter)
            print(f"[llm] loaded fine-tuned adapter: {adapter}")
        self._torch = torch

    def chat(self, system: str, user: str) -> str:
        messages = [
            {"role": "system", "content": system},
            {"role": "user", "content": user},
        ]
        inputs = self.tokenizer.apply_chat_template(
            messages, add_generation_prompt=True, return_tensors="pt"
        ).to(self.model.device)

        with self._torch.no_grad():
            out = self.model.generate(
                inputs,
                max_new_tokens=MAX_NEW_TOKENS,
                do_sample=True,
                temperature=0.8,
                top_p=0.9,
                repetition_penalty=1.1,
                pad_token_id=self.tokenizer.eos_token_id,
            )
        text = self.tokenizer.decode(
            out[0][inputs.shape[-1]:], skip_special_tokens=True
        )
        return text.strip()


# --------------------------------------------------------------------------- #
# Hugging Face Inference API (serverless, no local GPU)
# --------------------------------------------------------------------------- #
class InferenceAPIBackend:
    name = "inference_api"

    def __init__(self, model_id: str = DEFAULT_MODEL):
        from huggingface_hub import InferenceClient

        token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
        self.model_id = model_id
        self.client = InferenceClient(model=model_id, token=token)

    def chat(self, system: str, user: str) -> str:
        resp = self.client.chat_completion(
            messages=[
                {"role": "system", "content": system},
                {"role": "user", "content": user},
            ],
            max_tokens=MAX_NEW_TOKENS,
            temperature=0.8,
            top_p=0.9,
        )
        return resp.choices[0].message.content.strip()


# --------------------------------------------------------------------------- #
# mock (no weights, no network) — emits valid tagged output
# --------------------------------------------------------------------------- #
class MockBackend:
    """Deterministic-ish fake model. It reads the action out of the user message
    and produces a plausible tagged turn so the rest of the stack can be exercised
    end-to-end without any model. Not smart — just well-formed."""

    name = "mock"

    _SCENES = [
        ("A cold wind drags mist across {loc}. Something shifts in the dark ahead.",
         "ENEMY: Mist Wraith|hp=10|atk=3"),
        ("You find a leather pouch half-buried in the mud. Coins glint inside.",
         "GOLD: +7"),
        ("An old hermit beckons you toward a flickering lantern.",
         "NPC: Aldric|hermit|friendly|knows the old roads"),
        ("A rusted chest yields a glimmer of steel.",
         "ITEM_ADD: Iron Shortsword"),
        ("The path opens onto a ruined chapel, its bell long silent.",
         "LOCATION: The Ruined Chapel"),
    ]

    def __init__(self, model_id: str = "mock"):
        self.model_id = model_id
        self._rng = random.Random(7)

    def chat(self, system: str, user: str) -> str:
        action = user.lower()
        loc = "the crossroads"
        for line in user.splitlines():
            if line.lower().startswith("location:"):
                loc = line.split(":", 1)[1].strip()

        # Combat-aware: if the player attacks, hurt the enemy and take a hit back.
        if "in combat" in action and any(
            w in action for w in ("attack", "strike", "hit", "swing", "stab")
        ):
            narrative = "You lunge forward and your blade bites home; the creature shrieks and claws back."
            state = "ENEMY_HP: -6\nHP: -3\nXP: +4"
            choices = ["1. Press the attack.", "2. Back away and guard.", "3. Try to flee."]
        else:
            scene, change = self._rng.choice(self._SCENES)
            narrative = scene.format(loc=loc)
            state = change
            choices = ["1. Investigate closely.", "2. Move on carefully.", "3. Call out."]

        return (
            f"<narrative>\n{narrative}\n</narrative>\n"
            f"<state>\n{state}\n</state>\n"
            f"<choices>\n" + "\n".join(choices) + "\n</choices>"
        )


# --------------------------------------------------------------------------- #
# factory
# --------------------------------------------------------------------------- #
def build_backend(kind: str | None = None, model_id: str | None = None) -> Backend:
    kind = (kind or os.environ.get("MICRORPG_BACKEND", "transformers")).lower()
    model_id = model_id or DEFAULT_MODEL

    if kind == "mock":
        return MockBackend()
    if kind in ("inference_api", "api", "inference"):
        return InferenceAPIBackend(model_id)
    if kind in ("transformers", "local"):
        return TransformersBackend(model_id)
    raise ValueError(f"Unknown backend: {kind!r}")