Spaces:
Sleeping
Sleeping
updated inference.py
Browse files- inference.py +3 -2
inference.py
CHANGED
|
@@ -203,7 +203,7 @@ def get_model_message(client: OpenAI, step: int, features: list[list[float]]) ->
|
|
| 203 |
content = (completion.choices[0].message.content or "").strip()
|
| 204 |
return _parse_llm_decisions(content, len(features))
|
| 205 |
except Exception as exc:
|
| 206 |
-
print(f"[DEBUG] Model request failed: {exc}", flush=True)
|
| 207 |
time.sleep(1)
|
| 208 |
|
| 209 |
fallback_decisions = []
|
|
@@ -257,7 +257,8 @@ def main() -> None:
|
|
| 257 |
|
| 258 |
obs = env.step(action)
|
| 259 |
|
| 260 |
-
|
|
|
|
| 261 |
done = obs.done
|
| 262 |
error = None
|
| 263 |
|
|
|
|
| 203 |
content = (completion.choices[0].message.content or "").strip()
|
| 204 |
return _parse_llm_decisions(content, len(features))
|
| 205 |
except Exception as exc:
|
| 206 |
+
print(f"[DEBUG] Model request failed: {exc}", file=sys.stderr, flush=True)
|
| 207 |
time.sleep(1)
|
| 208 |
|
| 209 |
fallback_decisions = []
|
|
|
|
| 257 |
|
| 258 |
obs = env.step(action)
|
| 259 |
|
| 260 |
+
base_reward = float(obs.reward) if obs.reward is not None else 0.1
|
| 261 |
+
reward = float(max(0.01, min(0.99, base_reward)))
|
| 262 |
done = obs.done
|
| 263 |
error = None
|
| 264 |
|