maxxie114 Claude Sonnet 4.6 commited on
Commit
9ee3fcd
·
1 Parent(s): af6803d

Switch Oracle judge to OpenAI (gpt-5.4), support both backends

Browse files

- Replace Anthropic-only client with _build_llm_client() that tries
OPENAI_API_KEY first, then ANTHROPIC_API_KEY as fallback
- Default model changed to gpt-5.4
- Use chat.completions.create for OpenAI, messages.create for Anthropic

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. server/app.py +45 -25
server/app.py CHANGED
@@ -192,23 +192,32 @@ def _generic_scientist_system_prompt() -> str:
192
  )
193
 
194
  # ---------------------------------------------------------------------------
195
- # Oracle LLM judge — optional; requires ANTHROPIC_API_KEY
196
  # ---------------------------------------------------------------------------
197
 
198
  _ORACLE_ENABLED = os.environ.get("REPLICALAB_ORACLE_ENABLED", "1") == "1"
199
- _ORACLE_MODEL = os.environ.get("REPLICALAB_ORACLE_MODEL", "claude-haiku-4-5-20251001")
200
 
201
 
202
- def _build_anthropic_client() -> Optional[Any]:
203
- api_key = os.environ.get("ANTHROPIC_API_KEY")
204
- if not api_key:
205
- return None
206
- try:
207
- import anthropic # type: ignore
208
- return anthropic.Anthropic(api_key=api_key)
209
- except ImportError:
210
- log.warning("anthropic package not installed — Oracle judge unavailable")
211
- return None
 
 
 
 
 
 
 
 
 
212
 
213
 
214
  def _generate_judge_verdict(
@@ -216,13 +225,14 @@ def _generate_judge_verdict(
216
  scenario_pack: Any,
217
  conversation_history: list,
218
  ) -> str:
219
- """Call Anthropic to produce Judge Aldric's comprehensive verdict."""
220
  if not _ORACLE_ENABLED:
221
- return "Deterministic scoring only. Set REPLICALAB_ORACLE_ENABLED=1 and ANTHROPIC_API_KEY for LLM verdicts."
222
 
223
- client = _build_anthropic_client()
224
- if client is None:
225
- return "No LLM API key configured (ANTHROPIC_API_KEY). Deterministic scoring applied."
 
226
 
227
  # Format final protocol
228
  if state.current_protocol:
@@ -296,14 +306,24 @@ def _generate_judge_verdict(
296
  )
297
 
298
  try:
299
- import anthropic # type: ignore
300
- response = client.messages.create(
301
- model=_ORACLE_MODEL,
302
- max_tokens=1024,
303
- system=system_prompt,
304
- messages=[{"role": "user", "content": user_prompt}],
305
- )
306
- return response.content[0].text
 
 
 
 
 
 
 
 
 
 
307
  except Exception:
308
  log.exception("Oracle verdict generation failed")
309
  return "Judge Aldric was unable to render a verdict due to an API error."
 
192
  )
193
 
194
  # ---------------------------------------------------------------------------
195
+ # Oracle LLM judge — optional; requires OPENAI_API_KEY or ANTHROPIC_API_KEY
196
  # ---------------------------------------------------------------------------
197
 
198
  _ORACLE_ENABLED = os.environ.get("REPLICALAB_ORACLE_ENABLED", "1") == "1"
199
+ _ORACLE_MODEL = os.environ.get("REPLICALAB_ORACLE_MODEL", "gpt-5.4")
200
 
201
 
202
+ def _build_llm_client() -> Optional[Any]:
203
+ """Return (client, backend) where backend is 'openai' or 'anthropic'."""
204
+ openai_key = os.environ.get("OPENAI_API_KEY")
205
+ if openai_key:
206
+ try:
207
+ import openai as _openai # type: ignore
208
+ return (_openai.OpenAI(api_key=openai_key), "openai")
209
+ except ImportError:
210
+ log.warning("openai package not installed — Oracle judge unavailable")
211
+
212
+ anthropic_key = os.environ.get("ANTHROPIC_API_KEY")
213
+ if anthropic_key:
214
+ try:
215
+ import anthropic as _anthropic # type: ignore
216
+ return (_anthropic.Anthropic(api_key=anthropic_key), "anthropic")
217
+ except ImportError:
218
+ log.warning("anthropic package not installed — Oracle judge unavailable")
219
+
220
+ return None
221
 
222
 
223
  def _generate_judge_verdict(
 
225
  scenario_pack: Any,
226
  conversation_history: list,
227
  ) -> str:
228
+ """Call an LLM to produce Judge Aldric's comprehensive verdict."""
229
  if not _ORACLE_ENABLED:
230
+ return "Deterministic scoring only. Set REPLICALAB_ORACLE_ENABLED=1 and OPENAI_API_KEY for LLM verdicts."
231
 
232
+ result = _build_llm_client()
233
+ if result is None:
234
+ return "No LLM API key configured (OPENAI_API_KEY or ANTHROPIC_API_KEY). Deterministic scoring applied."
235
+ client, backend = result
236
 
237
  # Format final protocol
238
  if state.current_protocol:
 
306
  )
307
 
308
  try:
309
+ if backend == "openai":
310
+ response = client.chat.completions.create(
311
+ model=_ORACLE_MODEL,
312
+ max_tokens=1024,
313
+ messages=[
314
+ {"role": "system", "content": system_prompt},
315
+ {"role": "user", "content": user_prompt},
316
+ ],
317
+ )
318
+ return response.choices[0].message.content
319
+ else: # anthropic
320
+ response = client.messages.create(
321
+ model=_ORACLE_MODEL,
322
+ max_tokens=1024,
323
+ system=system_prompt,
324
+ messages=[{"role": "user", "content": user_prompt}],
325
+ )
326
+ return response.content[0].text
327
  except Exception:
328
  log.exception("Oracle verdict generation failed")
329
  return "Judge Aldric was unable to render a verdict due to an API error."