art87able commited on
Commit
ee4e9e6
·
verified ·
1 Parent(s): a5214ff

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. configs/endpoints.toml +7 -5
configs/endpoints.toml CHANGED
@@ -56,14 +56,16 @@ key = "EMPTY"
56
  type = "openai_chat_completions"
57
 
58
  # ---------------------------------------------------------------------------
59
- # prime (HOSTED FALLBACK) — Prime Intellect managed inference. Use if the local
60
- # vLLM is down or while waiting on venue compute. Costs PI credits per token (the
61
- # $50 pool covers Prime Inference + Sandboxes + On-Demand GPUs). PRIME_API_KEY is
62
- # read from the environment never hard-code a key here.
 
 
63
  # ---------------------------------------------------------------------------
64
  [[endpoint]]
65
  endpoint_id = "prime"
66
- model = "poolside/Laguna-XS.2"
67
  url = "https://api.pinference.ai/api/v1"
68
  key = "PRIME_API_KEY"
69
  type = "openai_chat_completions"
 
56
  type = "openai_chat_completions"
57
 
58
  # ---------------------------------------------------------------------------
59
+ # prime (HOSTED) — Prime Intellect managed inference (free for Laguna during the
60
+ # hackathon). PRIME_API_KEY is read from the environment never hard-code a key.
61
+ # NOTE: the pinference model id is LOWERCASE `poolside/laguna-xs.2` (the mixed-case
62
+ # `poolside/Laguna-XS.2` returns 404 model_not_found on pinference). To disable
63
+ # Laguna's reasoning/thinking on this endpoint, pass `reasoning_effort: "none"`
64
+ # (the raw chat_template_kwargs.enable_thinking flag is ignored here).
65
  # ---------------------------------------------------------------------------
66
  [[endpoint]]
67
  endpoint_id = "prime"
68
+ model = "poolside/laguna-xs.2"
69
  url = "https://api.pinference.ai/api/v1"
70
  key = "PRIME_API_KEY"
71
  type = "openai_chat_completions"