Upload folder using huggingface_hub
Browse files- configs/endpoints.toml +7 -5
configs/endpoints.toml
CHANGED
|
@@ -56,14 +56,16 @@ key = "EMPTY"
|
|
| 56 |
type = "openai_chat_completions"
|
| 57 |
|
| 58 |
# ---------------------------------------------------------------------------
|
| 59 |
-
# prime (HOSTED
|
| 60 |
-
#
|
| 61 |
-
#
|
| 62 |
-
#
|
|
|
|
|
|
|
| 63 |
# ---------------------------------------------------------------------------
|
| 64 |
[[endpoint]]
|
| 65 |
endpoint_id = "prime"
|
| 66 |
-
model = "poolside/
|
| 67 |
url = "https://api.pinference.ai/api/v1"
|
| 68 |
key = "PRIME_API_KEY"
|
| 69 |
type = "openai_chat_completions"
|
|
|
|
| 56 |
type = "openai_chat_completions"
|
| 57 |
|
| 58 |
# ---------------------------------------------------------------------------
|
| 59 |
+
# prime (HOSTED) — Prime Intellect managed inference (free for Laguna during the
|
| 60 |
+
# hackathon). PRIME_API_KEY is read from the environment — never hard-code a key.
|
| 61 |
+
# NOTE: the pinference model id is LOWERCASE `poolside/laguna-xs.2` (the mixed-case
|
| 62 |
+
# `poolside/Laguna-XS.2` returns 404 model_not_found on pinference). To disable
|
| 63 |
+
# Laguna's reasoning/thinking on this endpoint, pass `reasoning_effort: "none"`
|
| 64 |
+
# (the raw chat_template_kwargs.enable_thinking flag is ignored here).
|
| 65 |
# ---------------------------------------------------------------------------
|
| 66 |
[[endpoint]]
|
| 67 |
endpoint_id = "prime"
|
| 68 |
+
model = "poolside/laguna-xs.2"
|
| 69 |
url = "https://api.pinference.ai/api/v1"
|
| 70 |
key = "PRIME_API_KEY"
|
| 71 |
type = "openai_chat_completions"
|