SamaKool commited on
Commit
e6c33aa
·
1 Parent(s): a6c300d

updated inference.py

Browse files
Files changed (1) hide show
  1. inference.py +3 -2
inference.py CHANGED
@@ -203,7 +203,7 @@ def get_model_message(client: OpenAI, step: int, features: list[list[float]]) ->
203
  content = (completion.choices[0].message.content or "").strip()
204
  return _parse_llm_decisions(content, len(features))
205
  except Exception as exc:
206
- print(f"[DEBUG] Model request failed: {exc}", flush=True)
207
  time.sleep(1)
208
 
209
  fallback_decisions = []
@@ -257,7 +257,8 @@ def main() -> None:
257
 
258
  obs = env.step(action)
259
 
260
- reward = float(obs.reward) if obs.reward is not None else 0.1
 
261
  done = obs.done
262
  error = None
263
 
 
203
  content = (completion.choices[0].message.content or "").strip()
204
  return _parse_llm_decisions(content, len(features))
205
  except Exception as exc:
206
+ print(f"[DEBUG] Model request failed: {exc}", file=sys.stderr, flush=True)
207
  time.sleep(1)
208
 
209
  fallback_decisions = []
 
257
 
258
  obs = env.step(action)
259
 
260
+ base_reward = float(obs.reward) if obs.reward is not None else 0.1
261
+ reward = float(max(0.01, min(0.99, base_reward)))
262
  done = obs.done
263
  error = None
264