| """LLM backends for the GPU Goblin agent loop. |
| |
| Two backends ship today, both Qwen, both speaking OpenAI-shape tool calls: |
| |
| * ``QwenHFBackend`` β Qwen via Hugging Face Inference Providers. |
| HF auto-routes to Together / Fireworks-AI / |
| Nebius / etc. Needs ``HF_TOKEN``. The default |
| and the path the public HF Space uses. |
| |
| * ``QwenVLLMBackend`` β Qwen self-hosted on YOUR MI300X via vLLM, |
| OpenAI-compatible at ``http://host:8000/v1``. |
| "All AMD silicon" path. Stand it up with |
| the lablab tutorial recipe, then point Goblin |
| at it with ``GOBLIN_AGENT_BACKEND=qwen-vllm``. |
| |
| Pick one with the env var ``GOBLIN_AGENT_BACKEND``: |
| |
| export GOBLIN_AGENT_BACKEND=qwen-hf # default β uses HF_TOKEN |
| export GOBLIN_AGENT_BACKEND=qwen-vllm # uses GOBLIN_QWEN_VLLM_URL |
| |
| Backend-specific knobs: |
| |
| # qwen-hf |
| HF_TOKEN # required |
| GOBLIN_QWEN_MODEL # default Qwen/Qwen2.5-7B-Instruct |
| GOBLIN_QWEN_PROVIDER # default auto |
| |
| # qwen-vllm |
| GOBLIN_QWEN_VLLM_URL # default http://localhost:8000/v1 |
| GOBLIN_QWEN_VLLM_MODEL # default Qwen/Qwen2.5-7B-Instruct |
| GOBLIN_QWEN_VLLM_KEY # optional auth header (vLLM ignores it) |
| """ |
|
|
| from __future__ import annotations |
|
|
| import os |
|
|
| from agent.backends.base import AgentTurn, Backend, ToolCall |
|
|
| __all__ = [ |
| "AgentTurn", |
| "Backend", |
| "ToolCall", |
| "make_backend", |
| "active_backend_name", |
| ] |
|
|
|
|
| _VLLM_ALIASES = {"qwen-vllm", "qwen_vllm", "vllm", "local", "qwen-local"} |
| _HF_ALIASES = {"qwen-hf", "qwen_hf", "qwen", "hf"} |
|
|
|
|
| def active_backend_name() -> str: |
| """The backend name selected by env, normalized to its canonical id. |
| |
| Anything not recognised falls through to ``qwen-hf`` (the safe default). |
| """ |
| raw = (os.environ.get("GOBLIN_AGENT_BACKEND") or "qwen-hf").strip().lower() |
| if raw in _VLLM_ALIASES: |
| return "qwen-vllm" |
| if raw in _HF_ALIASES: |
| return "qwen-hf" |
| return "qwen-hf" |
|
|
|
|
| def make_backend(system_prompt: str, **kwargs) -> Backend: |
| """Construct the agent backend selected by ``GOBLIN_AGENT_BACKEND``. |
| |
| Constructor kwargs (``model``, ``provider``, ``base_url``, ``api_key``, |
| ``max_tokens``) are forwarded to whichever backend is chosen; irrelevant |
| kwargs are silently dropped so callers can stay backend-agnostic. |
| |
| Imports the chosen backend module lazily β neither the openai SDK nor |
| huggingface_hub is loaded unless the corresponding backend is actually |
| in use. |
| """ |
| name = active_backend_name() |
| if name == "qwen-vllm": |
| from agent.backends.qwen_vllm import QwenVLLMBackend |
|
|
| return QwenVLLMBackend( |
| system_prompt=system_prompt, |
| model=kwargs.get("model"), |
| base_url=kwargs.get("base_url"), |
| api_key=kwargs.get("api_key"), |
| max_tokens=kwargs.get("max_tokens", 2048), |
| ) |
|
|
| from agent.backends.qwen_hf import QwenHFBackend |
|
|
| return QwenHFBackend( |
| system_prompt=system_prompt, |
| model=kwargs.get("model"), |
| provider=kwargs.get("provider"), |
| max_tokens=kwargs.get("max_tokens", 2048), |
| ) |
|
|