gpu-goblin / agent /backends /__init__.py
sasukeUchiha123's picture
Upload agent/backends/__init__.py with huggingface_hub
5b450c1 verified
Raw
History Blame Contribute Delete
3.33 kB
"""LLM backends for the GPU Goblin agent loop.
Two backends ship today, both Qwen, both speaking OpenAI-shape tool calls:
* ``QwenHFBackend`` β€” Qwen via Hugging Face Inference Providers.
HF auto-routes to Together / Fireworks-AI /
Nebius / etc. Needs ``HF_TOKEN``. The default
and the path the public HF Space uses.
* ``QwenVLLMBackend`` β€” Qwen self-hosted on YOUR MI300X via vLLM,
OpenAI-compatible at ``http://host:8000/v1``.
"All AMD silicon" path. Stand it up with
the lablab tutorial recipe, then point Goblin
at it with ``GOBLIN_AGENT_BACKEND=qwen-vllm``.
Pick one with the env var ``GOBLIN_AGENT_BACKEND``:
export GOBLIN_AGENT_BACKEND=qwen-hf # default β€” uses HF_TOKEN
export GOBLIN_AGENT_BACKEND=qwen-vllm # uses GOBLIN_QWEN_VLLM_URL
Backend-specific knobs:
# qwen-hf
HF_TOKEN # required
GOBLIN_QWEN_MODEL # default Qwen/Qwen2.5-7B-Instruct
GOBLIN_QWEN_PROVIDER # default auto
# qwen-vllm
GOBLIN_QWEN_VLLM_URL # default http://localhost:8000/v1
GOBLIN_QWEN_VLLM_MODEL # default Qwen/Qwen2.5-7B-Instruct
GOBLIN_QWEN_VLLM_KEY # optional auth header (vLLM ignores it)
"""
from __future__ import annotations
import os
from agent.backends.base import AgentTurn, Backend, ToolCall
__all__ = [
"AgentTurn",
"Backend",
"ToolCall",
"make_backend",
"active_backend_name",
]
_VLLM_ALIASES = {"qwen-vllm", "qwen_vllm", "vllm", "local", "qwen-local"}
_HF_ALIASES = {"qwen-hf", "qwen_hf", "qwen", "hf"}
def active_backend_name() -> str:
"""The backend name selected by env, normalized to its canonical id.
Anything not recognised falls through to ``qwen-hf`` (the safe default).
"""
raw = (os.environ.get("GOBLIN_AGENT_BACKEND") or "qwen-hf").strip().lower()
if raw in _VLLM_ALIASES:
return "qwen-vllm"
if raw in _HF_ALIASES:
return "qwen-hf"
return "qwen-hf"
def make_backend(system_prompt: str, **kwargs) -> Backend:
"""Construct the agent backend selected by ``GOBLIN_AGENT_BACKEND``.
Constructor kwargs (``model``, ``provider``, ``base_url``, ``api_key``,
``max_tokens``) are forwarded to whichever backend is chosen; irrelevant
kwargs are silently dropped so callers can stay backend-agnostic.
Imports the chosen backend module lazily β€” neither the openai SDK nor
huggingface_hub is loaded unless the corresponding backend is actually
in use.
"""
name = active_backend_name()
if name == "qwen-vllm":
from agent.backends.qwen_vllm import QwenVLLMBackend
return QwenVLLMBackend(
system_prompt=system_prompt,
model=kwargs.get("model"),
base_url=kwargs.get("base_url"),
api_key=kwargs.get("api_key"),
max_tokens=kwargs.get("max_tokens", 2048),
)
from agent.backends.qwen_hf import QwenHFBackend
return QwenHFBackend(
system_prompt=system_prompt,
model=kwargs.get("model"),
provider=kwargs.get("provider"),
max_tokens=kwargs.get("max_tokens", 2048),
)