#!/usr/bin/env python3 """ TTRLVR + AZR 통합 검증 스위트 전체 시스템의 검증을 위한 통합 스크립트: 1. 환경 검증 2. 단위 테스트 3. 미니 통합 테스트 (1라운드 실행) 4. 성능 벤치마크 5. 최종 검증 보고서 생성 """ import os import sys import json import subprocess import tempfile import time from datetime import datetime from pathlib import Path # 경로 설정 sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2') def run_command(command, description, timeout=300): """명령어 실행 및 결과 반환""" print(f"🔄 {description}") print(f" Command: {command}") start_time = time.time() try: result = subprocess.run( command, shell=True, capture_output=True, text=True, timeout=timeout, cwd='/home/ubuntu/RLVR/TestTime-RLVR-v2' ) duration = time.time() - start_time if result.returncode == 0: print(f"✅ {description} completed ({duration:.1f}s)") return True, result.stdout, result.stderr else: print(f"❌ {description} failed ({duration:.1f}s)") print(f" Error: {result.stderr}") return False, result.stdout, result.stderr except subprocess.TimeoutExpired: print(f"⏰ {description} timed out after {timeout}s") return False, "", "Timeout" except Exception as e: print(f"💥 {description} crashed: {e}") return False, "", str(e) def run_environment_validation(): """환경 검증 실행""" print("\n" + "="*60) print("1️⃣ 환경 검증") print("="*60) success, _, _ = run_command( "cd /home/ubuntu/RLVR/TestTime-RLVR-v2/test && python validate_environment.py", "Environment validation" ) return success def run_unit_tests(): """단위 테스트 실행""" print("\n" + "="*60) print("2️⃣ 단위 테스트") print("="*60) success, _, _ = run_command( "cd /home/ubuntu/RLVR/TestTime-RLVR-v2/test && python test_ttrlvr_azr_integration.py", "Unit tests" ) return success def run_mini_integration_test(): """미니 통합 테스트 (1문제, 2라운드)""" print("\n" + "="*60) print("3️⃣ 미니 통합 테스트") print("="*60) # 짧은 통합 테스트 실행 success, stdout, stderr = run_command( "cd /home/ubuntu/RLVR/TestTime-RLVR-v2/test && python train_ttrlvr_azr.py --benchmark mbpp --problems 1 --rounds 2 --debug", "Mini integration test (1 problem, 2 rounds)", timeout=1800 # 30분 ) if success: print("✅ Mini integration test completed successfully") # 결과 파일 확인 results_dir = Path("/home/ubuntu/RLVR/TestTime-RLVR-v2/test/results/ttrlvr_azr") if results_dir.exists(): latest_result = max(results_dir.glob("*"), key=os.path.getctime, default=None) if latest_result: print(f"📁 Results saved to: {latest_result}") # 결과 파일 분석 result_file = latest_result / "training_results.json" if result_file.exists(): with open(result_file, 'r') as f: results = json.load(f) print(f"📊 Test summary:") print(f" - Success: {results.get('success', False)}") print(f" - Completed rounds: {len(results.get('rounds', {}))}") print(f" - Final model: {results.get('final_model', 'N/A')}") return success def check_disk_space(): """디스크 공간 확인""" print("\n" + "="*60) print("4️⃣ 디스크 공간 확인") print("="*60) # 중요 디렉토리들의 디스크 사용량 확인 paths_to_check = [ "/home/ubuntu/RLVR", "/data", "/tmp" ] all_good = True for path in paths_to_check: if os.path.exists(path): success, stdout, _ = run_command(f"df -h {path}", f"Disk usage for {path}") if success: lines = stdout.strip().split('\n') if len(lines) > 1: fields = lines[1].split() if len(fields) >= 5: used_percent = fields[4].rstrip('%') if used_percent.isdigit() and int(used_percent) > 90: print(f"⚠️ Warning: {path} is {used_percent}% full") all_good = False else: print(f"✅ {path}: {used_percent}% used") else: print(f"⚠️ Path not found: {path}") return all_good def run_performance_benchmark(): """성능 벤치마크""" print("\n" + "="*60) print("5️⃣ 성능 벤치마크") print("="*60) # GPU 메모리 사용량 확인 print("🖥️ GPU 메모리 상태:") gpu_success, gpu_output, _ = run_command("nvidia-smi --query-gpu=memory.used,memory.total --format=csv,noheader,nounits", "GPU memory check") if gpu_success: for i, line in enumerate(gpu_output.strip().split('\n')): if line.strip(): try: used, total = map(int, line.split(', ')) usage_percent = (used / total) * 100 print(f" GPU {i}: {used}MB / {total}MB ({usage_percent:.1f}%)") except: print(f" GPU {i}: {line}") # 시스템 메모리 확인 print("\n💾 시스템 메모리 상태:") mem_success, mem_output, _ = run_command("free -h", "System memory check") if mem_success: for line in mem_output.split('\n')[:2]: # 첫 2줄만 print(f" {line}") # CPU 사용률 확인 print("\n🖥️ CPU 상태:") cpu_success, cpu_output, _ = run_command("top -bn1 | grep 'Cpu(s)' | head -1", "CPU usage check") if cpu_success: print(f" {cpu_output.strip()}") return gpu_success and mem_success def generate_validation_report(results): """검증 보고서 생성""" print("\n" + "="*60) print("6️⃣ 검증 보고서 생성") print("="*60) timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') report_file = f"/tmp/ttrlvr_azr_validation_report_{timestamp}.json" # 보고서 데이터 report = { 'timestamp': datetime.now().isoformat(), 'validation_results': results, 'summary': { 'total_tests': len(results), 'passed_tests': sum(1 for result in results.values() if result['success']), 'overall_success': all(result['success'] for result in results.values()) }, 'recommendations': [] } # HTML 보고서 생성 html_report = f"/tmp/ttrlvr_azr_validation_report_{timestamp}.html" html_content = f""" TTRLVR + AZR Validation Report

TTRLVR + AZR Integration Validation Report

Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

Overall Status: {'✅ ALL TESTS PASSED' if report['summary']['overall_success'] else '❌ SOME TESTS FAILED'}

Tests: {report['summary']['passed_tests']}/{report['summary']['total_tests']} passed

Test Results

""" for test_name, result in results.items(): status = "success" if result['success'] else "failure" icon = "✅" if result['success'] else "❌" html_content += f"""

{icon} {test_name}

Duration: {result.get('duration', 'N/A')}

Details: {result.get('details', 'No details available')}

""" if report['recommendations']: html_content += """

Recommendations

""" html_content += """ """ # 파일 저장 with open(report_file, 'w') as f: json.dump(report, f, indent=2) with open(html_report, 'w') as f: f.write(html_content) print(f"📄 JSON 보고서: {report_file}") print(f"🌐 HTML 보고서: {html_report}") return report def main(): """메인 실행 함수""" print("🧪 TTRLVR + AZR 통합 검증 스위트 시작") print("=" * 60) print(f"시작 시간: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print("=" * 60) # 검증 결과 저장 results = {} start_time = time.time() # 1. 환경 검증 test_start = time.time() success = run_environment_validation() results['environment_validation'] = { 'success': success, 'duration': f"{time.time() - test_start:.1f}s", 'details': 'Environment setup and dependencies check' } # 2. 단위 테스트 (환경 검증 성공 시에만) if success: test_start = time.time() success = run_unit_tests() results['unit_tests'] = { 'success': success, 'duration': f"{time.time() - test_start:.1f}s", 'details': 'Component unit tests and integration tests' } else: results['unit_tests'] = { 'success': False, 'duration': '0s', 'details': 'Skipped due to environment validation failure' } # 3. 미니 통합 테스트 (이전 테스트들 성공 시에만) if results['unit_tests']['success']: test_start = time.time() success = run_mini_integration_test() results['mini_integration_test'] = { 'success': success, 'duration': f"{time.time() - test_start:.1f}s", 'details': 'End-to-end pipeline test with 1 problem, 2 rounds' } else: results['mini_integration_test'] = { 'success': False, 'duration': '0s', 'details': 'Skipped due to previous test failures' } # 4. 디스크 공간 확인 (항상 실행) test_start = time.time() success = check_disk_space() results['disk_space_check'] = { 'success': success, 'duration': f"{time.time() - test_start:.1f}s", 'details': 'Available disk space in critical directories' } # 5. 성능 벤치마크 (항상 실행) test_start = time.time() success = run_performance_benchmark() results['performance_benchmark'] = { 'success': success, 'duration': f"{time.time() - test_start:.1f}s", 'details': 'System resource usage and performance metrics' } # 6. 보고서 생성 total_duration = time.time() - start_time print(f"\n⏱️ 총 실행 시간: {total_duration:.1f}초 ({total_duration/60:.1f}분)") report = generate_validation_report(results) # 최종 결과 print("\n" + "="*60) print("🏁 검증 스위트 완료") print("="*60) passed = sum(1 for result in results.values() if result['success']) total = len(results) print(f"📊 최종 결과: {passed}/{total} 테스트 통과") if report['summary']['overall_success']: print("🎉 모든 검증 통과! TTRLVR + AZR 시스템 실행 준비 완료") return 0 else: print("⚠️ 일부 검증 실패. 위의 결과를 확인하고 문제를 해결하세요.") return 1 if __name__ == '__main__': exit_code = main() sys.exit(exit_code)