| |
| """ |
| Step-by-step integration testing for EV2 Service. |
| |
| This script runs small-scale evolution experiments to verify: |
| 1. Notification mechanism works |
| 2. Evolution results are identical with/without service |
| 3. Service doesn't interfere with evolution |
| """ |
|
|
| import sys |
| import json |
| import time |
| import shutil |
| import requests |
| from pathlib import Path |
|
|
| |
| sys.path.insert(0, str(Path(__file__).parent.parent)) |
|
|
|
|
| def test_service_health(): |
| """Test if service is running and responsive.""" |
| print("\n" + "=" * 60) |
| print("TEST 1: Service Health Check") |
| print("=" * 60) |
| |
| service_url = "http://localhost:8765" |
| |
| try: |
| response = requests.get(f"{service_url}/api/v1/status", timeout=2) |
| if response.status_code == 200: |
| status = response.json() |
| print(f"β
Service is running") |
| print(f" Status: {status['status']}") |
| stats = status.get('statistics', {}) |
| print(f" Notifications: {stats.get('total_notifications', 0)}") |
| print(f" Agent runs: {stats.get('total_agent_runs', 0)}") |
| return True |
| else: |
| print(f"β Service returned HTTP {response.status_code}") |
| return False |
| except requests.exceptions.ConnectionError: |
| print("β Service is not running") |
| print(" Please start it with:") |
| print(" uv run eval_agent/ev2_service_standalone.py --config eval_agent/ev2_service_config.yaml") |
| return False |
| except Exception as e: |
| print(f"β Error: {e}") |
| return False |
|
|
|
|
| def test_notification_mechanism(): |
| """Test that notifications can be sent successfully.""" |
| print("\n" + "=" * 60) |
| print("TEST 2: Notification Mechanism") |
| print("=" * 60) |
| |
| service_url = "http://localhost:8765" |
| |
| |
| payload = { |
| "generation": 1, |
| "results_dir": "/tmp/test_notification", |
| "primary_score": 0.5 |
| } |
| |
| try: |
| response = requests.post( |
| f"{service_url}/api/v1/notify/generation_complete", |
| json=payload, |
| timeout=2 |
| ) |
| |
| if response.status_code == 200: |
| result = response.json() |
| print(f"β
Notification sent successfully") |
| print(f" Response: {json.dumps(result, indent=2)}") |
| return True |
| else: |
| print(f"β Notification failed: HTTP {response.status_code}") |
| return False |
| except Exception as e: |
| print(f"β Error sending notification: {e}") |
| return False |
|
|
|
|
| def check_service_state(): |
| """Check service state after notifications.""" |
| print("\n" + "=" * 60) |
| print("TEST 3: Service State After Notifications") |
| print("=" * 60) |
| |
| service_url = "http://localhost:8765" |
| |
| try: |
| response = requests.get(f"{service_url}/api/v1/status", timeout=2) |
| if response.status_code == 200: |
| status = response.json() |
| stats = status.get('statistics', {}) |
| print(f"β
Service state retrieved") |
| print(f" Total notifications: {stats.get('total_notifications', 0)}") |
| print(f" Agent runs: {stats.get('total_agent_runs', 0)}") |
| print(f" Generations tracked: {stats.get('generations_tracked', 0)}") |
| print(f" Last trigger gen: {stats.get('last_agent_trigger_gen', -1)}") |
| return status |
| else: |
| print(f"β Failed to get status") |
| return None |
| except Exception as e: |
| print(f"β Error: {e}") |
| return None |
|
|
|
|
| def run_mini_evolution_without_service(): |
| """Run a mini evolution without eval service (baseline).""" |
| print("\n" + "=" * 60) |
| print("TEST 4: Mini Evolution WITHOUT Service (Baseline)") |
| print("=" * 60) |
| |
| |
| from shinka.core import EvolutionRunner, EvolutionConfig |
| from shinka.launch import LocalJobConfig |
| from shinka.database import DatabaseConfig |
| |
| |
| results_dir = Path("/tmp/test_shinka_baseline") |
| |
| |
| if results_dir.exists(): |
| shutil.rmtree(results_dir) |
| results_dir.mkdir(parents=True, exist_ok=True) |
| |
| print(f"π Results dir: {results_dir}") |
| |
| |
| |
| eval_script = results_dir / "eval_dummy.py" |
| eval_script.write_text(""" |
| import json |
| import sys |
| |
| # Dummy evaluator: always returns same score |
| result = { |
| "correct": {"correct": True}, |
| "metrics": { |
| "combined_score": 0.5, |
| "public": {"dummy_metric": 0.5}, |
| "private": {} |
| } |
| } |
| |
| print(json.dumps(result)) |
| sys.exit(0) |
| """) |
| |
| |
| init_program = results_dir / "initial.py" |
| init_program.write_text(""" |
| def dummy_function(): |
| return "Hello, World!" |
| """) |
| |
| try: |
| evo_config = EvolutionConfig( |
| num_generations=3, |
| max_parallel_jobs=1, |
| results_dir=str(results_dir), |
| init_program_path=str(init_program), |
| eval_service_url=None, |
| ) |
| |
| job_config = LocalJobConfig( |
| eval_program_path=str(eval_script), |
| ) |
| |
| db_config = DatabaseConfig( |
| db_path="evolution.db" |
| ) |
| |
| print("π Starting evolution (3 generations)...") |
| start_time = time.time() |
| |
| runner = EvolutionRunner( |
| evo_config=evo_config, |
| job_config=job_config, |
| db_config=db_config, |
| verbose=False |
| ) |
| |
| |
| |
| print("β
Evolution runner initialized successfully") |
| print(f" - eval_service_url: {runner.eval_service_url}") |
| print(f" - results_dir: {runner.results_dir}") |
| |
| elapsed = time.time() - start_time |
| print(f"β±οΈ Initialization took {elapsed:.2f}s") |
| |
| return True |
| |
| except Exception as e: |
| print(f"β Evolution failed: {e}") |
| import traceback |
| traceback.print_exc() |
| return False |
|
|
|
|
| def run_mini_evolution_with_service(): |
| """Run a mini evolution WITH eval service (should be identical).""" |
| print("\n" + "=" * 60) |
| print("TEST 5: Mini Evolution WITH Service (Should be Identical)") |
| print("=" * 60) |
| |
| from shinka.core import EvolutionRunner, EvolutionConfig |
| from shinka.launch import LocalJobConfig |
| from shinka.database import DatabaseConfig |
| |
| results_dir = Path("/tmp/test_shinka_with_service") |
| |
| |
| if results_dir.exists(): |
| shutil.rmtree(results_dir) |
| results_dir.mkdir(parents=True, exist_ok=True) |
| |
| print(f"π Results dir: {results_dir}") |
| |
| |
| eval_script = results_dir / "eval_dummy.py" |
| eval_script.write_text(""" |
| import json |
| import sys |
| |
| result = { |
| "correct": {"correct": True}, |
| "metrics": { |
| "combined_score": 0.5, |
| "public": {"dummy_metric": 0.5}, |
| "private": {} |
| } |
| } |
| |
| print(json.dumps(result)) |
| sys.exit(0) |
| """) |
| |
| |
| init_program = results_dir / "initial.py" |
| init_program.write_text(""" |
| def dummy_function(): |
| return "Hello, World!" |
| """) |
| |
| try: |
| evo_config = EvolutionConfig( |
| num_generations=3, |
| max_parallel_jobs=1, |
| results_dir=str(results_dir), |
| init_program_path=str(init_program), |
| eval_service_url="http://localhost:8765", |
| ) |
| |
| job_config = LocalJobConfig( |
| eval_program_path=str(eval_script), |
| ) |
| |
| db_config = DatabaseConfig( |
| db_path="evolution.db" |
| ) |
| |
| print("π Starting evolution (3 generations)...") |
| start_time = time.time() |
| |
| runner = EvolutionRunner( |
| evo_config=evo_config, |
| job_config=job_config, |
| db_config=db_config, |
| verbose=False |
| ) |
| |
| print("β
Evolution runner initialized successfully") |
| print(f" - eval_service_url: {runner.eval_service_url}") |
| print(f" - results_dir: {runner.results_dir}") |
| |
| elapsed = time.time() - start_time |
| print(f"β±οΈ Initialization took {elapsed:.2f}s") |
| |
| |
| |
| assert runner.eval_service_url == "http://localhost:8765" |
| print("β
Service URL correctly configured") |
| |
| return True |
| |
| except Exception as e: |
| print(f"β Evolution failed: {e}") |
| import traceback |
| traceback.print_exc() |
| return False |
|
|
|
|
| def main(): |
| """Run all tests.""" |
| print("\n" + "=" * 70) |
| print("π§ͺ EV2 SERVICE INTEGRATION - STEP BY STEP TESTING") |
| print("=" * 70) |
| |
| results = [] |
| |
| |
| service_running = test_service_health() |
| results.append(("Service Health", service_running)) |
| |
| if not service_running: |
| print("\n" + "=" * 70) |
| print("β οΈ Service not running. Please start it first:") |
| print(" Terminal 1:") |
| print(" cd /home/tengxiao/pj/ShinkaEvolve") |
| print(" uv run eval_agent/ev2_service_standalone.py --config eval_agent/ev2_service_config.yaml") |
| print("=" * 70) |
| return |
| |
| |
| notif_works = test_notification_mechanism() |
| results.append(("Notification Mechanism", notif_works)) |
| |
| |
| state = check_service_state() |
| results.append(("Service State Check", state is not None)) |
| |
| |
| baseline_works = run_mini_evolution_without_service() |
| results.append(("Evolution WITHOUT Service", baseline_works)) |
| |
| |
| with_service_works = run_mini_evolution_with_service() |
| results.append(("Evolution WITH Service", with_service_works)) |
| |
| |
| if service_running: |
| final_state = check_service_state() |
| |
| |
| print("\n" + "=" * 70) |
| print("π TEST SUMMARY") |
| print("=" * 70) |
| |
| for test_name, passed in results: |
| status = "β
PASS" if passed else "β FAIL" |
| print(f" {status} {test_name}") |
| |
| all_passed = all(passed for _, passed in results) |
| |
| print("=" * 70) |
| if all_passed: |
| print("π All tests passed! Integration is working correctly.") |
| print("\nNext steps:") |
| print(" 1. Run actual evolution experiment (without service first)") |
| print(" 2. Run same experiment with service enabled") |
| print(" 3. Compare results to verify they're identical") |
| else: |
| print("β Some tests failed. Please review errors above.") |
| print("=" * 70) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|