Spaces:

PalDPathak
/

Smart-Traffic-openenv

Sleeping

Aryansabasana

Cleanup: Remove all comments and docstrings from Python and config files

7a78f7e 2 months ago

10.8 kB


	import random
	import sys
	import os
	import re
	import ast
	import pathlib


	sys.path.insert(0, os.path.dirname(__file__))
	from src.tasks import EasyTask, MediumTask, HardTask
	from src.agent import DeterministicAgent
	from src.models import State

	results = {}

	def run_single(seed=None):
	if seed is None:
	seed = random.randint(1000, 99999)
	random.seed(seed)

	agent = DeterministicAgent()
	tasks = {"Easy": EasyTask(), "Medium": MediumTask(), "Hard": HardTask()}
	scores = {}
	metrics = {}

	for level, task in tasks.items():
	task_seed = seed + list(tasks.keys()).index(level) * 999
	state = task.reset(seed=task_seed)
	done = False
	steps = 0
	total_reward = 0.0
	while not done:
	action_idx = agent.get_action(state)
	result = task.step(action_idx)
	state = result.state
	total_reward += result.reward
	done = result.done
	steps += 1
	if steps > 500:
	break
	score = task.evaluate()
	scores[level] = score
	metrics[level] = {
	"cleared": result.info["total_cleared"],
	"avg_wait": result.info["avg_waiting_time"],
	"emg": result.info["emergencies_handled"],
	"reward": round(total_reward, 2),
	}

	overall = sum(scores.values()) / len(scores)
	scores["Overall"] = overall
	return seed, scores, metrics


	def sep(title):
	print(f"\n{'='*55}")
	print(f" {title}")
	print(f"{'='*55}")



	sep("TEST 1: SEED REPRODUCIBILITY")
	seed = 42
	s1, sc1, m1 = run_single(seed)
	s2, sc2, m2 = run_single(seed)

	print(f"Seed: {seed}")
	print(f"\nRun 1: Easy={sc1['Easy']:.4f} \| Medium={sc1['Medium']:.4f} \| Hard={sc1['Hard']:.4f} \| Overall={sc1['Overall']:.4f}")
	print(f" Cleared: {m1['Easy']['cleared']} / {m1['Medium']['cleared']} / {m1['Hard']['cleared']}")
	print(f"\nRun 2: Easy={sc2['Easy']:.4f} \| Medium={sc2['Medium']:.4f} \| Hard={sc2['Hard']:.4f} \| Overall={sc2['Overall']:.4f}")
	print(f" Cleared: {m2['Easy']['cleared']} / {m2['Medium']['cleared']} / {m2['Hard']['cleared']}")

	if sc1 == sc2 and m1 == m2:
	print("\n✅ TEST 1: SEED REPRODUCIBILITY → PASS")
	results["Seed Reproducibility"] = "PASS"
	else:
	print("\n❌ TEST 1: SEED REPRODUCIBILITY → FAIL (outputs differ with same seed)")
	results["Seed Reproducibility"] = "FAIL"



	sep("TEST 2: STOCHASTIC VARIABILITY (3 random runs)")
	runs = [run_single() for _ in range(3)]
	print(f"\n{'Run':<5} {'Seed':<8} {'Easy':<8} {'Medium':<9} {'Hard':<8} {'Overall':<10} {'Hard Cleared'}")
	print("-" * 65)
	for i, (sd, sc, mx) in enumerate(runs, 1):
	print(f" {i:<4} {sd:<8} {sc['Easy']:<8.4f} {sc['Medium']:<9.4f} {sc['Hard']:<8.4f} {sc['Overall']:<10.4f} {mx['Hard']['cleared']}")


	all_scores = [(r[1]['Easy'], r[1]['Medium'], r[1]['Hard']) for r in runs]
	unique_seeds = len(set(r[0] for r in runs)) == 3
	any_diff = len(set(s for s in [str(x) for x in all_scores])) > 1

	if unique_seeds and any_diff:
	print("\n✅ TEST 2: STOCHASTIC VARIABILITY → PASS (unique seeds + varying outputs)")
	results["Stochastic Variability"] = "PASS"
	else:
	print("\n❌ TEST 2: STOCHASTIC VARIABILITY → FAIL (same outputs == possible hardcoding)")
	results["Stochastic Variability"] = "FAIL"



	sep("TEST 3: NO HARDCODED VALUES SCAN")
	SUSPECT_PATTERNS = [
	r'\[0\.\d+,\s0\.\d+,\s0\.\d+',
	r'return\s+0\.\d+\b',
	]
	IGNORE_BLOCK_MARKER = '__name__'
	files = list(pathlib.Path('src').glob('*.py')) + [
	pathlib.Path('evaluate.py'), pathlib.Path('visualize.py')
	]
	all_ok = True
	for fp in files:
	content = fp.read_text(encoding='utf-8')

	parts = content.split('if __name__')
	audit_content = parts[0]
	warnings = []
	for pat in SUSPECT_PATTERNS:
	if re.search(pat, audit_content):
	warnings.append(pat)
	if warnings:
	print(f" ⚠️ WARNING: {fp} ← suspicious pattern found")
	all_ok = False
	else:
	print(f" ✅ OK: {fp}")

	if all_ok:
	print("\n✅ TEST 3: NO HARDCODED VALUES → PASS")
	results["No Hardcoding"] = "PASS"
	else:
	print("\n❌ TEST 3: NO HARDCODED VALUES → FAIL")
	results["No Hardcoding"] = "FAIL"



	sep("TEST 4: METRIC CONSISTENCY (Low vs High Traffic)")


	low_task = EasyTask()
	low_task.env.arrival_rate_base = 0.2
	state = low_task.reset(seed=100)
	agent_low = DeterministicAgent()
	done = False
	while not done:
	r = low_task.step(agent_low.get_action(state))
	state = r.state
	done = r.done
	low_score = low_task.evaluate()
	low_cleared = r.info["total_cleared"]
	low_wait = r.info["avg_waiting_time"]


	high_task = EasyTask()
	high_task.env.arrival_rate_base = 6.0
	state = high_task.reset(seed=100)
	agent_high = DeterministicAgent()
	done = False
	while not done:
	r = high_task.step(agent_high.get_action(state))
	state = r.state
	done = r.done
	high_score = high_task.evaluate()
	high_cleared = r.info["total_cleared"]
	high_wait = r.info["avg_waiting_time"]

	print(f"\n Low Traffic: Score={low_score:.4f} \| Cleared={low_cleared} \| Avg Wait={low_wait:.2f}")
	print(f" High Traffic: Score={high_score:.4f} \| Cleared={high_cleared} \| Avg Wait={high_wait:.2f}")
	print(f"\n Score Delta: {low_score - high_score:+.4f} \| Wait Delta: {high_wait - low_wait:+.2f}")

	if low_wait <= high_wait and low_score >= high_score:
	print("\n✅ TEST 4: METRIC CONSISTENCY → PASS (low traffic scores better than high traffic)")
	results["Metric Logic"] = "PASS"
	else:
	print("\n❌ TEST 4: METRIC CONSISTENCY → FAIL (higher traffic should not beat lower)")
	results["Metric Logic"] = "FAIL"



	sep("TEST 5: AGENT IMPACT (Real vs Random Policy)")


	_, real_scores, _ = run_single(seed=999)
	real_overall = real_scores["Overall"]


	class RandomAgent:
	def get_action(self, state):
	return random.choice([1, 2])

	def run_with_agent(agent_obj, seed=999):
	random.seed(seed)
	tasks = {"Easy": EasyTask(), "Medium": MediumTask(), "Hard": HardTask()}
	total_score = 0
	for level, task in tasks.items():
	task_seed = seed + list(tasks.keys()).index(level) * 999
	state = task.reset(seed=task_seed)
	done = False
	steps = 0
	while not done:
	r = task.step(agent_obj.get_action(state))
	state = r.state
	done = r.done
	steps += 1
	if steps > 500:
	break
	total_score += task.evaluate()
	return total_score / len(tasks)

	random_overall = run_with_agent(RandomAgent(), seed=999)
	delta = real_overall - random_overall

	print(f"\n Real Agent Score: {real_overall:.4f}")
	print(f" Random Agent Score: {random_overall:.4f}")
	print(f" Improvement Delta: {delta:+.4f} ({delta*100:.1f}%)")

	if delta > 0.02:
	print("\n✅ TEST 5: AGENT IMPACT → PASS (Real agent significantly outperforms random)")
	results["Agent Impact"] = "PASS"
	elif delta >= 0:
	print("\n⚠️ TEST 5: AGENT IMPACT → MARGINAL (small gap, agent barely helps)")
	results["Agent Impact"] = "MARGINAL"
	else:
	print("\n❌ TEST 5: AGENT IMPACT → FAIL (random policy beats real agent — logic error)")
	results["Agent Impact"] = "FAIL"



	sep("TEST 6: EXTREME SCENARIOS")


	zero_task = EasyTask()
	zero_task.env.arrival_rate_base = 0.0
	state = zero_task.reset(seed=7)
	done = False
	agent_z = DeterministicAgent()
	while not done:
	r = zero_task.step(agent_z.get_action(state))
	state = r.state
	done = r.done
	zero_score = zero_task.evaluate()


	cong_task = EasyTask()
	cong_task.env.arrival_rate_base = 10.0
	state = cong_task.reset(seed=7)
	done = False
	agent_c = DeterministicAgent()
	while not done:
	r = cong_task.step(agent_c.get_action(state))
	state = r.state
	done = r.done
	cong_score = cong_task.evaluate()


	from src.tasks import HardTask
	emg_task = HardTask()
	state = emg_task.reset(seed=77)
	agent_e = DeterministicAgent()
	done = False
	while not done:
	r = emg_task.step(agent_e.get_action(state))
	state = r.state
	done = r.done
	emg_score = emg_task.evaluate()
	emg_handled = r.info["emergencies_handled"]

	print(f"\n Case A (Zero traffic): Score = {zero_score:.4f} (expected ≈ 1.0)")
	print(f" Case B (Extreme traffic): Score = {cong_score:.4f} (expected < zero_score)")
	print(f" Case C (Emergency task): Score = {emg_score:.4f} \| Emergencies Handled = {emg_handled}")

	case_a = zero_score >= 0.85
	case_b = cong_score < zero_score
	case_c = emg_handled > 0

	if case_a and case_b and case_c:
	print("\n✅ TEST 6: EXTREME SCENARIOS → PASS")
	results["Extreme Cases"] = "PASS"
	else:
	issues = []
	if not case_a: issues.append(f"Zero-traffic score {zero_score:.3f} unexpectedly low")
	if not case_b: issues.append(f"Congested score {cong_score:.3f} ≥ zero-traffic score {zero_score:.3f}")
	if not case_c: issues.append("No emergencies handled in hard task")
	print(f"\n❌ TEST 6: EXTREME SCENARIOS → FAIL: {'; '.join(issues)}")
	results["Extreme Cases"] = "FAIL"



	sep("TEST 7: GRAPH VALIDATION (Score ↔ Graph Consistency)")
	from visualize import generate_graph

	_, sc_a, _ = run_single(seed=111)
	_, sc_b, _ = run_single(seed=222)

	out_a = "audit_graph_A.png"
	out_b = "audit_graph_B.png"
	generate_graph(sc_a, 111, output_path=out_a)
	generate_graph(sc_b, 222, output_path=out_b)


	size_a = os.path.getsize(out_a)
	size_b = os.path.getsize(out_b)
	graph_files_exist = os.path.exists(out_a) and os.path.exists(out_b)
	values_match = (
	abs(sc_a['Easy'] - sc_b['Easy']) > 0.0001 or
	abs(sc_a['Medium'] - sc_b['Medium']) > 0.0001
	)

	print(f"\n Seed 111: Easy={sc_a['Easy']:.4f} Medium={sc_a['Medium']:.4f} Hard={sc_a['Hard']:.4f}")
	print(f" Seed 222: Easy={sc_b['Easy']:.4f} Medium={sc_b['Medium']:.4f} Hard={sc_b['Hard']:.4f}")
	print(f" Graph A size: {size_a} bytes \| Graph B size: {size_b} bytes")
	print(f" Scores differ across seeds: {values_match}")
	print(f" Graph files generated: {graph_files_exist}")


	for f in [out_a, out_b]:
	if os.path.exists(f): os.remove(f)

	if graph_files_exist and values_match:
	print("\n✅ TEST 7: GRAPH VALIDATION → PASS (graphs generated from live scores, vary with seed)")
	results["Graph Accuracy"] = "PASS"
	else:
	print("\n❌ TEST 7: GRAPH VALIDATION → FAIL")
	results["Graph Accuracy"] = "FAIL"



	sep("FINAL AUDIT SUMMARY")
	icon = {"PASS": "✅", "FAIL": "❌", "MARGINAL": "⚠️ "}
	for test, status in results.items():
	print(f" {icon.get(status, '?')} {test}: {status}")

	any_fail = any(v == "FAIL" for v in results.values())
	any_marginal = any(v == "MARGINAL" for v in results.values())

	print(f"\n{'='*55}")
	if any_fail:
	print(" FINAL VERDICT: NEEDS FIXES ❌")
	elif any_marginal:
	print(" FINAL VERDICT: MOSTLY TRUSTED ⚠️ (minor issues detected)")
	else:
	print(" FINAL VERDICT: TRUSTED SYSTEM ✅")
	print(f"{'='*55}\n")