omm7 commited on
Commit
003dc9a
·
verified ·
1 Parent(s): 8d76ba4

Upload policy_utils.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. policy_utils.py +68 -0
policy_utils.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import time
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from toxic_royale_env.policy_models import FramePacket, PolicyAction
9
+
10
+
11
+ def _outputs_root() -> Path:
12
+ # Repository root is the parent of this file.
13
+ return Path(__file__).resolve().parent / "outputs" / "policy_logs"
14
+
15
+
16
+ def append_jsonl(path: Path, obj: dict[str, Any]) -> None:
17
+ path.parent.mkdir(parents=True, exist_ok=True)
18
+ with path.open("a", encoding="utf-8") as f:
19
+ f.write(json.dumps(obj, ensure_ascii=False) + "\n")
20
+
21
+
22
+ def rule_policy(packet: FramePacket) -> PolicyAction:
23
+ """
24
+ Always-valid baseline:
25
+ - If no playable card known, wait
26
+ - Else play the first playable (or first in hand if playable unknown) to a safe back zone.
27
+ """
28
+ hand = (packet.player.hand if packet.player else []) or []
29
+ elixir = float(packet.player.elixir) if (packet.player and packet.player.elixir is not None) else None
30
+
31
+ chosen = None
32
+ for c in hand:
33
+ if c.is_playable is True:
34
+ chosen = c
35
+ break
36
+
37
+ if chosen is None and elixir is not None:
38
+ for c in hand:
39
+ if c.cost <= elixir:
40
+ chosen = c
41
+ break
42
+
43
+ if chosen is None and hand:
44
+ chosen = hand[0]
45
+
46
+ if chosen is None:
47
+ return PolicyAction(kind="wait", emote="yawn")
48
+
49
+ # Alternate back zones by tick_id parity for minimal diversity
50
+ tick_id = packet.meta.tick_id
51
+ zone = "back_left" if (tick_id % 2 == 0) else "back_right"
52
+
53
+ # Lightweight BM: laugh when we can play; yawn when we can't (handled above).
54
+ return PolicyAction(kind="play", card=chosen.card, zone=zone, emote="laugh")
55
+
56
+
57
+ def should_gate(packet: FramePacket, threshold: float = 0.70) -> bool:
58
+ q = None
59
+ if packet.debug and packet.debug.detections_quality:
60
+ q = packet.debug.detections_quality.overall
61
+ if q is None:
62
+ return False
63
+ return float(q) < threshold
64
+
65
+
66
+ def now_ms() -> int:
67
+ return int(time.time() * 1000)
68
+