fix: add Groq as LLM fallback (NVIDIA → Groq → Local)
Browse files- config.py: add GROQ_CONFIG using GROQ_API_KEY env var
- api.py _get_llm: try Groq after NVIDIA fails, before Ollama
- Fix skip logic to work for all hosted backends (not just 'Cloud' in name)
- Don't pass NVIDIA-specific model_kwargs to Groq (avoids param rejection)
- Cleaner error message mentioning both NVIDIA_API_KEY and GROQ_API_KEY
- server/api.py +25 -14
- src/agentic/config.py +6 -0
server/api.py
CHANGED
|
@@ -91,46 +91,57 @@ STAGE_META: Dict[str, Dict[str, str]] = {
|
|
| 91 |
|
| 92 |
|
| 93 |
def _get_llm(byok_api_key: str = None):
|
| 94 |
-
"""
|
| 95 |
-
Priority: NVIDIA Nemotron →
|
| 96 |
|
| 97 |
If byok_api_key is provided (BYOK plan), it overrides the cloud config key.
|
| 98 |
"""
|
| 99 |
-
from agentic.config import CLOUD_CONFIG, LOCAL_CONFIG
|
| 100 |
from crewai import LLM
|
| 101 |
|
| 102 |
configs = [
|
| 103 |
-
("Cloud Compute Engine",
|
| 104 |
-
("
|
|
|
|
| 105 |
]
|
| 106 |
|
| 107 |
for name, cfg in configs:
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
|
|
|
| 111 |
continue
|
| 112 |
try:
|
| 113 |
extra = {}
|
| 114 |
if "glm5" in cfg["model"].lower():
|
| 115 |
extra = {"chat_template_kwargs": {"enable_thinking": True, "clear_thinking": False}}
|
| 116 |
|
| 117 |
-
|
| 118 |
model=cfg["model"],
|
| 119 |
-
base_url=cfg["base_url"],
|
| 120 |
api_key=key if key and key not in ("NA", "") else "mock-key",
|
| 121 |
temperature=0.60,
|
| 122 |
top_p=0.95,
|
| 123 |
max_completion_tokens=16384,
|
| 124 |
max_tokens=16384,
|
| 125 |
timeout=300,
|
| 126 |
-
extra_body=extra,
|
| 127 |
-
model_kwargs={"top_k": 20, "min_p": 0.0, "presence_penalty": 0, "repetition_penalty": 1}
|
| 128 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
return llm, name
|
| 130 |
except Exception:
|
| 131 |
continue
|
| 132 |
|
| 133 |
-
raise RuntimeError(
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
|
| 136 |
def _emit_event(job_id: str, event_type: str, state: str, message: str, step: int = 0, extra: dict = None):
|
|
@@ -944,7 +955,7 @@ async def trigger_build(req: BuildRequest, profile: dict = Depends(get_current_u
|
|
| 944 |
except RuntimeError as e:
|
| 945 |
raise HTTPException(
|
| 946 |
status_code=503,
|
| 947 |
-
detail=str(e)
|
| 948 |
)
|
| 949 |
|
| 950 |
# Sanitize design name — Verilog identifiers cannot start with a digit
|
|
|
|
| 91 |
|
| 92 |
|
| 93 |
def _get_llm(byok_api_key: str = None):
|
| 94 |
+
"""Tries cloud backends first, then local Ollama.
|
| 95 |
+
Priority: NVIDIA Nemotron → Groq LLaMA-3.3 → VeriReason Local
|
| 96 |
|
| 97 |
If byok_api_key is provided (BYOK plan), it overrides the cloud config key.
|
| 98 |
"""
|
| 99 |
+
from agentic.config import CLOUD_CONFIG, GROQ_CONFIG, LOCAL_CONFIG
|
| 100 |
from crewai import LLM
|
| 101 |
|
| 102 |
configs = [
|
| 103 |
+
("Cloud Compute Engine", CLOUD_CONFIG),
|
| 104 |
+
("Groq Compute Engine", GROQ_CONFIG),
|
| 105 |
+
("Local Compute Engine", LOCAL_CONFIG),
|
| 106 |
]
|
| 107 |
|
| 108 |
for name, cfg in configs:
|
| 109 |
+
is_local = "Local" in name
|
| 110 |
+
key = byok_api_key if (byok_api_key and not is_local) else cfg.get("api_key", "")
|
| 111 |
+
# Skip hosted configs that have no valid API key configured
|
| 112 |
+
if not is_local and (not key or key.strip() in ("", "mock-key", "NA")):
|
| 113 |
continue
|
| 114 |
try:
|
| 115 |
extra = {}
|
| 116 |
if "glm5" in cfg["model"].lower():
|
| 117 |
extra = {"chat_template_kwargs": {"enable_thinking": True, "clear_thinking": False}}
|
| 118 |
|
| 119 |
+
llm_kwargs: dict = dict(
|
| 120 |
model=cfg["model"],
|
|
|
|
| 121 |
api_key=key if key and key not in ("NA", "") else "mock-key",
|
| 122 |
temperature=0.60,
|
| 123 |
top_p=0.95,
|
| 124 |
max_completion_tokens=16384,
|
| 125 |
max_tokens=16384,
|
| 126 |
timeout=300,
|
|
|
|
|
|
|
| 127 |
)
|
| 128 |
+
if cfg.get("base_url"):
|
| 129 |
+
llm_kwargs["base_url"] = cfg["base_url"]
|
| 130 |
+
if extra:
|
| 131 |
+
llm_kwargs["extra_body"] = extra
|
| 132 |
+
# NVIDIA NIM / Ollama accept these extra sampling params; Groq does not
|
| 133 |
+
if "Groq" not in name:
|
| 134 |
+
llm_kwargs["model_kwargs"] = {"top_k": 20, "min_p": 0.0, "presence_penalty": 0, "repetition_penalty": 1}
|
| 135 |
+
|
| 136 |
+
llm = LLM(**llm_kwargs)
|
| 137 |
return llm, name
|
| 138 |
except Exception:
|
| 139 |
continue
|
| 140 |
|
| 141 |
+
raise RuntimeError(
|
| 142 |
+
"No valid LLM backend found. "
|
| 143 |
+
"Set NVIDIA_API_KEY or GROQ_API_KEY in HuggingFace Space secrets."
|
| 144 |
+
)
|
| 145 |
|
| 146 |
|
| 147 |
def _emit_event(job_id: str, event_type: str, state: str, message: str, step: int = 0, extra: dict = None):
|
|
|
|
| 955 |
except RuntimeError as e:
|
| 956 |
raise HTTPException(
|
| 957 |
status_code=503,
|
| 958 |
+
detail=str(e),
|
| 959 |
)
|
| 960 |
|
| 961 |
# Sanitize design name — Verilog identifiers cannot start with a digit
|
src/agentic/config.py
CHANGED
|
@@ -26,6 +26,12 @@ LOCAL_CONFIG = {
|
|
| 26 |
"api_key": os.environ.get("LLM_API_KEY", "NA"),
|
| 27 |
}
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
# Backward-compat alias used by parts of the codebase/docs
|
| 30 |
NVIDIA_CONFIG = CLOUD_CONFIG
|
| 31 |
|
|
|
|
| 26 |
"api_key": os.environ.get("LLM_API_KEY", "NA"),
|
| 27 |
}
|
| 28 |
|
| 29 |
+
GROQ_CONFIG = {
|
| 30 |
+
"model": os.environ.get("GROQ_MODEL", "groq/llama-3.3-70b-versatile"),
|
| 31 |
+
"base_url": "", # litellm resolves groq routing from the model prefix
|
| 32 |
+
"api_key": os.environ.get("GROQ_API_KEY", ""),
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
# Backward-compat alias used by parts of the codebase/docs
|
| 36 |
NVIDIA_CONFIG = CLOUD_CONFIG
|
| 37 |
|