RFTSystems commited on
Commit
387797b
·
verified ·
1 Parent(s): 0b23bcd

Update drp/simulate.py

Browse files
Files changed (1) hide show
  1. drp/simulate.py +26 -11
drp/simulate.py CHANGED
@@ -1,13 +1,12 @@
1
  import os
2
  import random
3
  import uuid
4
- from typing import Any, Dict, List, Optional, Tuple
5
 
6
  from .bundle import write_bundle_zip
7
 
8
 
9
  def _env_fingerprint() -> Dict[str, Any]:
10
- # Keep lightweight; users can expand this in real exporters
11
  return {
12
  "python": os.environ.get("PYTHON_VERSION") or "unknown",
13
  "space": os.environ.get("SPACE_ID") or os.environ.get("HF_SPACE_ID") or "unknown",
@@ -22,6 +21,7 @@ def make_demo_bundle_zip(out_path: str, *, seed: int, chaos: float, label: str)
22
  """
23
  Creates a synthetic agent timeline with controlled randomness.
24
  'chaos' increases divergence probability.
 
25
  """
26
  rng = random.Random(seed)
27
  run_id = f"demo-{label}-{uuid.uuid4().hex[:8]}"
@@ -30,37 +30,50 @@ def make_demo_bundle_zip(out_path: str, *, seed: int, chaos: float, label: str)
30
 
31
  events: List[Dict[str, Any]] = []
32
  memory: Dict[str, Any] = {"goal": "reach_target", "notes": []}
 
 
 
 
 
 
 
33
 
34
  for i in range(40):
35
- # planning
36
  action = rng.choice(["scan", "move", "ask_tool", "write_memory"])
37
  if rng.random() < chaos:
38
  action = rng.choice(["scan", "move", "ask_tool", "write_memory", "panic"])
39
 
40
- events.append(_mk_event("plan_step", f"t{i}.plan", {"action": action, "score": rng.random()}))
 
41
 
42
  if action == "ask_tool":
43
  q = rng.choice(["price", "status", "latency", "risk"])
44
  events.append(_mk_event("tool_call", f"t{i}.tool_call", {"tool": "mock_api", "query": q}))
45
- # tool sometimes flakes
46
  if rng.random() < (0.15 + chaos * 0.2):
47
  events.append(_mk_event("tool_result", f"t{i}.tool_result", {"ok": False, "error": "timeout"}))
 
48
  else:
49
  val = rng.randint(1, 100)
50
  events.append(_mk_event("tool_result", f"t{i}.tool_result", {"ok": True, "value": val}))
 
51
  elif action == "write_memory":
52
  note = rng.choice(["cached", "retry", "validated", "unsafe", "needs_review"])
53
  memory["notes"].append(note)
54
  events.append(_mk_event("memory_write", f"t{i}.mem", {"write": {"notes": list(memory["notes"])}}))
 
55
  elif action == "panic":
56
  events.append(_mk_event("guardrail", f"t{i}.guardrail", {"blocked": True, "reason": "anomaly"}))
57
- events.append(_mk_event("state_snapshot", f"t{i}.state", {"memory": memory, "mode": "halt"}))
 
 
58
  break
59
  else:
60
- # move / scan influences a synthetic "state"
61
- events.append(_mk_event("state_snapshot", f"t{i}.state", {"x": rng.randint(0, 9), "y": rng.randint(0, 9), "memory": memory}))
 
 
 
62
 
63
- # llm sample (synthetic text)
64
  txt = rng.choice(
65
  [
66
  "Proceed with caution.",
@@ -88,6 +101,7 @@ def make_demo_bundle_zip(out_path: str, *, seed: int, chaos: float, label: str)
88
  model_id=model_id,
89
  env_fingerprint=_env_fingerprint(),
90
  events_payloads=events,
 
91
  )
92
 
93
 
@@ -101,8 +115,7 @@ def fork_patch_bundle(
101
  patch_payload_json: Optional[Dict[str, Any]] = None,
102
  ) -> str:
103
  """
104
- Simple “what-if” fork: take an existing bundle and patch a single event
105
- (kind/step/payload) then re-hash-chain and re-emit as a new run.
106
  """
107
  from .bundle import load_bundle, write_bundle_zip
108
 
@@ -136,4 +149,6 @@ def fork_patch_bundle(
136
  model_id=b.manifest.get("model_id", "unknown"),
137
  env_fingerprint=b.manifest.get("env", {}),
138
  events_payloads=payloads,
 
 
139
  )
 
1
  import os
2
  import random
3
  import uuid
4
+ from typing import Any, Dict, List, Optional
5
 
6
  from .bundle import write_bundle_zip
7
 
8
 
9
  def _env_fingerprint() -> Dict[str, Any]:
 
10
  return {
11
  "python": os.environ.get("PYTHON_VERSION") or "unknown",
12
  "space": os.environ.get("SPACE_ID") or os.environ.get("HF_SPACE_ID") or "unknown",
 
21
  """
22
  Creates a synthetic agent timeline with controlled randomness.
23
  'chaos' increases divergence probability.
24
+ Also includes reward_total in state snapshots to demonstrate reward delta.
25
  """
26
  rng = random.Random(seed)
27
  run_id = f"demo-{label}-{uuid.uuid4().hex[:8]}"
 
30
 
31
  events: List[Dict[str, Any]] = []
32
  memory: Dict[str, Any] = {"goal": "reach_target", "notes": []}
33
+ reward_total = 0.0
34
+
35
+ # Optional replay link template (demo)
36
+ replay = {
37
+ "base_url": "https://example.com/replay",
38
+ "pattern": "/?run_id={run_id}&i={i}",
39
+ }
40
 
41
  for i in range(40):
 
42
  action = rng.choice(["scan", "move", "ask_tool", "write_memory"])
43
  if rng.random() < chaos:
44
  action = rng.choice(["scan", "move", "ask_tool", "write_memory", "panic"])
45
 
46
+ score = rng.random()
47
+ events.append(_mk_event("plan_step", f"t{i}.plan", {"action": action, "score": score}))
48
 
49
  if action == "ask_tool":
50
  q = rng.choice(["price", "status", "latency", "risk"])
51
  events.append(_mk_event("tool_call", f"t{i}.tool_call", {"tool": "mock_api", "query": q}))
 
52
  if rng.random() < (0.15 + chaos * 0.2):
53
  events.append(_mk_event("tool_result", f"t{i}.tool_result", {"ok": False, "error": "timeout"}))
54
+ reward_total -= 0.5
55
  else:
56
  val = rng.randint(1, 100)
57
  events.append(_mk_event("tool_result", f"t{i}.tool_result", {"ok": True, "value": val}))
58
+ reward_total += 0.2
59
  elif action == "write_memory":
60
  note = rng.choice(["cached", "retry", "validated", "unsafe", "needs_review"])
61
  memory["notes"].append(note)
62
  events.append(_mk_event("memory_write", f"t{i}.mem", {"write": {"notes": list(memory["notes"])}}))
63
+ reward_total += 0.05
64
  elif action == "panic":
65
  events.append(_mk_event("guardrail", f"t{i}.guardrail", {"blocked": True, "reason": "anomaly"}))
66
+ events.append(
67
+ _mk_event("state_snapshot", f"t{i}.state", {"memory": memory, "mode": "halt", "reward_total": reward_total})
68
+ )
69
  break
70
  else:
71
+ # scan/move
72
+ x = rng.randint(0, 9)
73
+ y = rng.randint(0, 9)
74
+ reward_total += 0.01
75
+ events.append(_mk_event("state_snapshot", f"t{i}.state", {"x": x, "y": y, "memory": memory, "reward_total": reward_total}))
76
 
 
77
  txt = rng.choice(
78
  [
79
  "Proceed with caution.",
 
101
  model_id=model_id,
102
  env_fingerprint=_env_fingerprint(),
103
  events_payloads=events,
104
+ replay=replay,
105
  )
106
 
107
 
 
115
  patch_payload_json: Optional[Dict[str, Any]] = None,
116
  ) -> str:
117
  """
118
+ Counterfactual workflow: patch an event at index N, re-hash-chain into a new bundle.
 
119
  """
120
  from .bundle import load_bundle, write_bundle_zip
121
 
 
149
  model_id=b.manifest.get("model_id", "unknown"),
150
  env_fingerprint=b.manifest.get("env", {}),
151
  events_payloads=payloads,
152
+ replay=b.manifest.get("replay"),
153
+ run_url=b.manifest.get("run_url"),
154
  )