""" Validation script for OpenEnv compatibility. Run this before submitting: python validate.py """ import sys import traceback from pathlib import Path from reproagent.environment import ReproAgentEnv def validate_environment(): """Validate environment meets OpenEnv requirements.""" print("="*70) print("šŸ” VALIDATING REPROAGENT ENVIRONMENT") print("="*70) print() all_passed = True # Test 1: Import environment print("Test 1: Environment Import") try: from reproagent.environment import ReproAgentEnv print(" āœ… Environment imported successfully") except Exception as e: print(f" āŒ Failed to import environment: {e}") traceback.print_exc() all_passed = False return False # Test 2: Create environment print("\nTest 2: Environment Creation") try: env = ReproAgentEnv(difficulty="easy", max_steps=20, use_llm=False) print(" āœ… Environment created") except Exception as e: print(f" āŒ Failed to create environment: {e}") traceback.print_exc() all_passed = False return False # Test 3: Check spaces print("\nTest 3: Action/Observation Spaces") try: assert hasattr(env, 'action_space'), "Missing action_space" assert hasattr(env, 'observation_space'), "Missing observation_space" print(f" āœ… Action space: {env.action_space}") print(f" āœ… Observation space: {type(env.observation_space).__name__}") except Exception as e: print(f" āŒ Space validation failed: {e}") all_passed = False # Test 4: Reset print("\nTest 4: Reset") try: obs, info = env.reset() assert obs is not None, "Observation is None" assert isinstance(info, dict), "Info is not dict" print(" āœ… Reset successful") print(f" āœ… Observation keys: {list(obs.keys())}") print(f" āœ… Info keys: {list(info.keys())}") except Exception as e: print(f" āŒ Reset failed: {e}") traceback.print_exc() all_passed = False return False # Test 5: Observation space validation print("\nTest 5: Observation Space Validation") try: assert env.observation_space.contains(obs), "Observation not in space" print(" āœ… Observation matches observation_space") except Exception as e: print(f" āŒ Observation space mismatch: {e}") all_passed = False # Test 6: Action space validation print("\nTest 6: Action Space Validation") try: action = env.action_space.sample() assert env.action_space.contains(action), "Action not in space" print(f" āœ… Sampled action: {action}") print(f" āœ… Action is valid") except Exception as e: print(f" āŒ Action space validation failed: {e}") all_passed = False # Test 7: Step print("\nTest 7: Step") try: obs, reward, terminated, truncated, info = env.step(action) assert obs is not None, "Observation is None" assert isinstance(reward, (int, float)), "Reward is not numeric" assert isinstance(terminated, bool), "Terminated is not bool" assert isinstance(truncated, bool), "Truncated is not bool" assert isinstance(info, dict), "Info is not dict" print(" āœ… Step successful") print(f" āœ… Reward: {reward:.2f}") print(f" āœ… Terminated: {terminated}") print(f" āœ… Truncated: {truncated}") except Exception as e: print(f" āŒ Step failed: {e}") traceback.print_exc() all_passed = False return False # Test 8: Full episode print("\nTest 8: Full Episode") try: env.reset() total_reward = 0 steps = 0 for i in range(10): action = env.action_space.sample() obs, reward, terminated, truncated, info = env.step(action) total_reward += reward steps += 1 if terminated or truncated: break print(f" āœ… Episode completed") print(f" āœ… Steps: {steps}") print(f" āœ… Total reward: {total_reward:.2f}") except Exception as e: print(f" āŒ Episode failed: {e}") traceback.print_exc() all_passed = False # Test 9: Multiple episodes print("\nTest 9: Multiple Episodes") try: for episode in range(3): env.reset() for _ in range(5): action = env.action_space.sample() obs, reward, terminated, truncated, info = env.step(action) if terminated or truncated: break print(f" āœ… 3 episodes completed successfully") except Exception as e: print(f" āŒ Multiple episodes failed: {e}") traceback.print_exc() all_passed = False # Test 10: Render print("\nTest 10: Render") try: env.reset() output = env.render() print(" āœ… Render successful") except Exception as e: print(f" āš ļø Render failed (non-critical): {e}") # Test 11: Close print("\nTest 11: Close") try: env.close() print(" āœ… Close successful") except Exception as e: print(f" āš ļø Close failed (non-critical): {e}") # Summary print("\n" + "="*70) if all_passed: print("āœ… ALL VALIDATION TESTS PASSED!") print("="*70) print("\nšŸŽ‰ Environment is OpenEnv compatible!") print("āœ… Ready for submission") return True else: print("āŒ SOME TESTS FAILED") print("="*70) print("\nāš ļø Please fix errors before submission") return False def validate_agents(): """Validate agents can interact with environment.""" print("\n" + "="*70) print("šŸ¤– VALIDATING AGENTS") print("="*70) print() try: from reproagent.environment import ReproAgentEnv from agents.reasoning_agent import create_agent env = ReproAgentEnv(difficulty="easy", max_steps=10, use_llm=False) # Test reasoning agent print("Test: Reasoning Agent") agent = create_agent(env, "reasoning", use_llm=False) obs, info = env.reset() agent.reset() for i in range(5): action = agent.select_action(obs, info) obs, reward, terminated, truncated, info = env.step(action) if terminated or truncated: break print(" āœ… Reasoning agent works") # Test random agent print("\nTest: Random Agent") random_agent = create_agent(env, "random") obs, info = env.reset() random_agent.reset() for i in range(5): action = random_agent.select_action(obs, info) obs, reward, terminated, truncated, info = env.step(action) if terminated or truncated: break print(" āœ… Random agent works") print("\nāœ… All agents validated successfully") return True except Exception as e: print(f"\nāŒ Agent validation failed: {e}") traceback.print_exc() return False def validate_demo(): """Validate Gradio demo can be imported.""" print("\n" + "="*70) print("šŸŽØ VALIDATING DEMO") print("="*70) print() try: from server.app import create_demo print(" āœ… Demo imported successfully") print(" ā„¹ļø To test demo fully, run: python server/app.py") return True except Exception as e: print(f" āŒ Demo import failed: {e}") traceback.print_exc() return False def validate_graders(): """Validate grading system.""" print("\n" + "="*70) print("šŸ“Š VALIDATING GRADERS") print("="*70) print() try: from graders.graders import ReproductionGrader print(" āœ… Grader imported successfully") return True except Exception as e: print(f" āŒ Grader import failed: {e}") traceback.print_exc() return False def validate_openenv_yaml(): """Validate openenv.yaml exists.""" print("\n" + "="*70) print("šŸ“„ VALIDATING openenv.yaml") print("="*70) print() yaml_path = Path("openenv.yaml") if yaml_path.exists(): print(" āœ… openenv.yaml exists") try: import yaml with open(yaml_path) as f: config = yaml.safe_load(f) required_keys = ['name', 'environment', 'observation_space', 'action_space'] for key in required_keys: if key in config: print(f" āœ… Has '{key}'") else: print(f" āš ļø Missing '{key}'") return True except Exception as e: print(f" āš ļø Could not parse YAML: {e}") return True # Non-critical else: print(" āš ļø openenv.yaml not found (will need to create)") return True # Non-critical for now def main(): """Run all validation tests.""" print("\n" + "šŸš€"*35) print("REPROAGENT VALIDATION SUITE") print("šŸš€"*35 + "\n") results = { 'environment': validate_environment(), 'agents': validate_agents(), 'demo': validate_demo(), 'graders': validate_graders(), 'openenv_yaml': validate_openenv_yaml() } # Final summary print("\n" + "="*70) print("šŸ“Š VALIDATION SUMMARY") print("="*70) for component, passed in results.items(): status = "āœ… PASSED" if passed else "āŒ FAILED" print(f"{component.upper():<20} {status}") print("="*70) if all(results.values()): print("\nšŸŽ‰ ALL VALIDATIONS PASSED!") print("āœ… System is ready for deployment") return 0 else: print("\nāš ļø SOME VALIDATIONS FAILED") print("Please fix errors before proceeding") return 1 if __name__ == "__main__": sys.exit(main())