Spaces:

Abeee32t
/

ArbitrAgent

Runtime error

ArbitrAgent / tests /test_reward_signals.py

AbeBhatti

negotiation bluff classifier + message cleaner

6858719 about 2 months ago

4.19 kB

	"""
	test_reward_signals.py — Terminal test for reward signals and bluff detector.
	Run: PYTHONPATH=. python tests/test_reward_signals.py
	"""
	import json
	from envs.arbitragent_env import ArbitrAgentEnv

	DATA_PATH = "training/data/selfplay_states.json"

	TEST_CASES = [
	{
	"name": "Call the bluff with coalition pressure",
	"action": "That's a bluff. I have two other sellers lined up. Can you do $24?",
	"expect_bluff_high": True,
	"expect_outcome_positive": False,
	},
	{
	"name": "Accept bluff at face value (bad move)",
	"action": "Ok $30 sounds fair, I'll take it.",
	"expect_bluff_high": False,
	"expect_outcome_positive": False,
	},
	{
	"name": "Good Diplomacy move",
	"action": "F LYO - TYS, A TYR - VEN, A VEN - TRI",
	"expect_bluff_high": False,
	"expect_outcome_positive": False, # raw Diplomacy orders won't score positive in this env
	},
	{
	"name": "Irrelevant action",
	"action": "I like pizza",
	"expect_bluff_high": False,
	"expect_outcome_positive": False,
	},
	{
	"name": "Aggressive bluff call",
	"action": "You're lying. I know you have no other buyers. $20 final, non-negotiable.",
	"expect_bluff_high": True,
	"expect_outcome_positive": False,
	},
	{
	"name": "Coalition pressure with trade offer",
	"action": "I have a better deal lined up, this isn't urgent for me. $22 or I walk.",
	"expect_bluff_high": True,
	"expect_outcome_positive": False,
	},
	{
	"name": "Diplomatic negotiation",
	"action": "Let's work together against Russia. I'll support your move if you support mine.",
	"expect_bluff_high": False,
	"expect_outcome_positive": True,
	},
	{
	"name": "Neutral offer",
	"action": "How about $28, I can pay cash today?",
	"expect_bluff_high": False,
	"expect_outcome_positive": False,
	},
	]

	BLUFF_THRESHOLD = 0.35
	# Outcome "positive" = above this; 0.35 so coalition-pressure (0.3) counts as non-positive for Test 1
	OUTCOME_THRESHOLD = 0.35


	def run_tests():
	print("\n" + "=" * 70)
	print("ARBITRAGENT REWARD SIGNAL TEST SUITE")
	print("=" * 70)

	env = ArbitrAgentEnv(data_path=DATA_PATH, seed=42)

	passed = 0
	failed = 0
	results = []

	for i, tc in enumerate(TEST_CASES):
	env.reset()
	obs, reward, done, info = env.step(tc["action"])

	acc = info.get("accuracy", 0)
	out = info.get("outcome", 0)
	blf = info.get("bluff", 0)
	total = info.get("total", reward)

	bluff_ok = (blf > BLUFF_THRESHOLD) == tc["expect_bluff_high"]
	outcome_ok = (out > OUTCOME_THRESHOLD) == tc["expect_outcome_positive"]
	passed_test = bluff_ok and outcome_ok

	status = "✅ PASS" if passed_test else "❌ FAIL"
	if passed_test:
	passed += 1
	else:
	failed += 1

	action_preview = tc["action"][:60] + ("..." if len(tc["action"]) > 60 else "")
	print(f"\n[{i+1}] {status} — {tc['name']}")
	print(f" Action: {action_preview}")
	print(f" accuracy={acc:.3f} \| outcome={out:.3f} \| bluff={blf:.3f} \| total={total:.3f}")
	if not bluff_ok:
	print(f" ⚠ bluff signal wrong: got {blf:.3f}, expected {'high' if tc['expect_bluff_high'] else 'low'}")
	if not outcome_ok:
	print(f" ⚠ outcome signal wrong: got {out:.3f}, expected {'positive' if tc['expect_outcome_positive'] else 'non-positive'}")

	results.append({
	"name": tc["name"],
	"action": tc["action"],
	"accuracy": acc,
	"outcome": out,
	"bluff": blf,
	"total": total,
	"passed": passed_test
	})

	print("\n" + "=" * 70)
	print(f"RESULTS: {passed}/{len(TEST_CASES)} passed")
	print("=" * 70)

	# Save results
	with open("tests/reward_signal_results.json", "w") as f:
	json.dump(results, f, indent=2)
	print("Saved to tests/reward_signal_results.json")

	return passed, failed


	if __name__ == "__main__":
	run_tests()