vxkyyy commited on
Commit
1601347
·
1 Parent(s): 8b30743

fix: add Groq as LLM fallback (NVIDIA → Groq → Local)

Browse files

- config.py: add GROQ_CONFIG using GROQ_API_KEY env var
- api.py _get_llm: try Groq after NVIDIA fails, before Ollama
- Fix skip logic to work for all hosted backends (not just 'Cloud' in name)
- Don't pass NVIDIA-specific model_kwargs to Groq (avoids param rejection)
- Cleaner error message mentioning both NVIDIA_API_KEY and GROQ_API_KEY

Files changed (2) hide show
  1. server/api.py +25 -14
  2. src/agentic/config.py +6 -0
server/api.py CHANGED
@@ -91,46 +91,57 @@ STAGE_META: Dict[str, Dict[str, str]] = {
91
 
92
 
93
  def _get_llm(byok_api_key: str = None):
94
- """Mirrors CLI's get_llm() — tries cloud first, falls back to local.
95
- Priority: NVIDIA Nemotron → GLM5 Cloud → VeriReason Local
96
 
97
  If byok_api_key is provided (BYOK plan), it overrides the cloud config key.
98
  """
99
- from agentic.config import CLOUD_CONFIG, LOCAL_CONFIG
100
  from crewai import LLM
101
 
102
  configs = [
103
- ("Cloud Compute Engine", CLOUD_CONFIG),
104
- ("Local Compute Engine", LOCAL_CONFIG),
 
105
  ]
106
 
107
  for name, cfg in configs:
108
- key = byok_api_key if (byok_api_key and "Cloud" in name) else cfg.get("api_key", "")
109
- # Skip cloud configs with no valid key
110
- if "Cloud" in name and (not key or key.strip() in ("", "mock-key", "NA")):
 
111
  continue
112
  try:
113
  extra = {}
114
  if "glm5" in cfg["model"].lower():
115
  extra = {"chat_template_kwargs": {"enable_thinking": True, "clear_thinking": False}}
116
 
117
- llm = LLM(
118
  model=cfg["model"],
119
- base_url=cfg["base_url"],
120
  api_key=key if key and key not in ("NA", "") else "mock-key",
121
  temperature=0.60,
122
  top_p=0.95,
123
  max_completion_tokens=16384,
124
  max_tokens=16384,
125
  timeout=300,
126
- extra_body=extra,
127
- model_kwargs={"top_k": 20, "min_p": 0.0, "presence_penalty": 0, "repetition_penalty": 1}
128
  )
 
 
 
 
 
 
 
 
 
129
  return llm, name
130
  except Exception:
131
  continue
132
 
133
- raise RuntimeError("No valid LLM backend found. Check NVIDIA_API_KEY or local Ollama.")
 
 
 
134
 
135
 
136
  def _emit_event(job_id: str, event_type: str, state: str, message: str, step: int = 0, extra: dict = None):
@@ -944,7 +955,7 @@ async def trigger_build(req: BuildRequest, profile: dict = Depends(get_current_u
944
  except RuntimeError as e:
945
  raise HTTPException(
946
  status_code=503,
947
- detail=str(e) + " Set NVIDIA_API_KEY in HuggingFace Space secrets.",
948
  )
949
 
950
  # Sanitize design name — Verilog identifiers cannot start with a digit
 
91
 
92
 
93
  def _get_llm(byok_api_key: str = None):
94
+ """Tries cloud backends first, then local Ollama.
95
+ Priority: NVIDIA Nemotron → Groq LLaMA-3.3 → VeriReason Local
96
 
97
  If byok_api_key is provided (BYOK plan), it overrides the cloud config key.
98
  """
99
+ from agentic.config import CLOUD_CONFIG, GROQ_CONFIG, LOCAL_CONFIG
100
  from crewai import LLM
101
 
102
  configs = [
103
+ ("Cloud Compute Engine", CLOUD_CONFIG),
104
+ ("Groq Compute Engine", GROQ_CONFIG),
105
+ ("Local Compute Engine", LOCAL_CONFIG),
106
  ]
107
 
108
  for name, cfg in configs:
109
+ is_local = "Local" in name
110
+ key = byok_api_key if (byok_api_key and not is_local) else cfg.get("api_key", "")
111
+ # Skip hosted configs that have no valid API key configured
112
+ if not is_local and (not key or key.strip() in ("", "mock-key", "NA")):
113
  continue
114
  try:
115
  extra = {}
116
  if "glm5" in cfg["model"].lower():
117
  extra = {"chat_template_kwargs": {"enable_thinking": True, "clear_thinking": False}}
118
 
119
+ llm_kwargs: dict = dict(
120
  model=cfg["model"],
 
121
  api_key=key if key and key not in ("NA", "") else "mock-key",
122
  temperature=0.60,
123
  top_p=0.95,
124
  max_completion_tokens=16384,
125
  max_tokens=16384,
126
  timeout=300,
 
 
127
  )
128
+ if cfg.get("base_url"):
129
+ llm_kwargs["base_url"] = cfg["base_url"]
130
+ if extra:
131
+ llm_kwargs["extra_body"] = extra
132
+ # NVIDIA NIM / Ollama accept these extra sampling params; Groq does not
133
+ if "Groq" not in name:
134
+ llm_kwargs["model_kwargs"] = {"top_k": 20, "min_p": 0.0, "presence_penalty": 0, "repetition_penalty": 1}
135
+
136
+ llm = LLM(**llm_kwargs)
137
  return llm, name
138
  except Exception:
139
  continue
140
 
141
+ raise RuntimeError(
142
+ "No valid LLM backend found. "
143
+ "Set NVIDIA_API_KEY or GROQ_API_KEY in HuggingFace Space secrets."
144
+ )
145
 
146
 
147
  def _emit_event(job_id: str, event_type: str, state: str, message: str, step: int = 0, extra: dict = None):
 
955
  except RuntimeError as e:
956
  raise HTTPException(
957
  status_code=503,
958
+ detail=str(e),
959
  )
960
 
961
  # Sanitize design name — Verilog identifiers cannot start with a digit
src/agentic/config.py CHANGED
@@ -26,6 +26,12 @@ LOCAL_CONFIG = {
26
  "api_key": os.environ.get("LLM_API_KEY", "NA"),
27
  }
28
 
 
 
 
 
 
 
29
  # Backward-compat alias used by parts of the codebase/docs
30
  NVIDIA_CONFIG = CLOUD_CONFIG
31
 
 
26
  "api_key": os.environ.get("LLM_API_KEY", "NA"),
27
  }
28
 
29
+ GROQ_CONFIG = {
30
+ "model": os.environ.get("GROQ_MODEL", "groq/llama-3.3-70b-versatile"),
31
+ "base_url": "", # litellm resolves groq routing from the model prefix
32
+ "api_key": os.environ.get("GROQ_API_KEY", ""),
33
+ }
34
+
35
  # Backward-compat alias used by parts of the codebase/docs
36
  NVIDIA_CONFIG = CLOUD_CONFIG
37