Spaces:

lablab-ai-amd-developer-hackathon
/

gpu-goblin

Sleeping

App Files Files Community

gpu-goblin / agent /backends /__init__.py

sasukeUchiha123

Upload agent/backends/__init__.py with huggingface_hub

5b450c1 verified about 2 months ago

Raw

History Blame Contribute Delete

3.33 kB

	"""LLM backends for the GPU Goblin agent loop.

	Two backends ship today, both Qwen, both speaking OpenAI-shape tool calls:

	* ``QwenHFBackend`` — Qwen via Hugging Face Inference Providers.
	HF auto-routes to Together / Fireworks-AI /
	Nebius / etc. Needs ``HF_TOKEN``. The default
	and the path the public HF Space uses.

	* ``QwenVLLMBackend`` — Qwen self-hosted on YOUR MI300X via vLLM,
	OpenAI-compatible at ``http://host:8000/v1``.
	"All AMD silicon" path. Stand it up with
	the lablab tutorial recipe, then point Goblin
	at it with ``GOBLIN_AGENT_BACKEND=qwen-vllm``.

	Pick one with the env var ``GOBLIN_AGENT_BACKEND``:

	export GOBLIN_AGENT_BACKEND=qwen-hf # default — uses HF_TOKEN
	export GOBLIN_AGENT_BACKEND=qwen-vllm # uses GOBLIN_QWEN_VLLM_URL

	Backend-specific knobs:

	# qwen-hf
	HF_TOKEN # required
	GOBLIN_QWEN_MODEL # default Qwen/Qwen2.5-7B-Instruct
	GOBLIN_QWEN_PROVIDER # default auto

	# qwen-vllm
	GOBLIN_QWEN_VLLM_URL # default http://localhost:8000/v1
	GOBLIN_QWEN_VLLM_MODEL # default Qwen/Qwen2.5-7B-Instruct
	GOBLIN_QWEN_VLLM_KEY # optional auth header (vLLM ignores it)
	"""

	from __future__ import annotations

	import os

	from agent.backends.base import AgentTurn, Backend, ToolCall

	__all__ = [
	"AgentTurn",
	"Backend",
	"ToolCall",
	"make_backend",
	"active_backend_name",
	]


	_VLLM_ALIASES = {"qwen-vllm", "qwen_vllm", "vllm", "local", "qwen-local"}
	_HF_ALIASES = {"qwen-hf", "qwen_hf", "qwen", "hf"}


	def active_backend_name() -> str:
	"""The backend name selected by env, normalized to its canonical id.

	Anything not recognised falls through to ``qwen-hf`` (the safe default).
	"""
	raw = (os.environ.get("GOBLIN_AGENT_BACKEND") or "qwen-hf").strip().lower()
	if raw in _VLLM_ALIASES:
	return "qwen-vllm"
	if raw in _HF_ALIASES:
	return "qwen-hf"
	return "qwen-hf"


	def make_backend(system_prompt: str, **kwargs) -> Backend:
	"""Construct the agent backend selected by ``GOBLIN_AGENT_BACKEND``.

	Constructor kwargs (``model``, ``provider``, ``base_url``, ``api_key``,
	``max_tokens``) are forwarded to whichever backend is chosen; irrelevant
	kwargs are silently dropped so callers can stay backend-agnostic.

	Imports the chosen backend module lazily — neither the openai SDK nor
	huggingface_hub is loaded unless the corresponding backend is actually
	in use.
	"""
	name = active_backend_name()
	if name == "qwen-vllm":
	from agent.backends.qwen_vllm import QwenVLLMBackend

	return QwenVLLMBackend(
	system_prompt=system_prompt,
	model=kwargs.get("model"),
	base_url=kwargs.get("base_url"),
	api_key=kwargs.get("api_key"),
	max_tokens=kwargs.get("max_tokens", 2048),
	)

	from agent.backends.qwen_hf import QwenHFBackend

	return QwenHFBackend(
	system_prompt=system_prompt,
	model=kwargs.get("model"),
	provider=kwargs.get("provider"),
	max_tokens=kwargs.get("max_tokens", 2048),
	)