File size: 12,583 Bytes

24c2665

#!/usr/bin/env python3
"""
TTRLVR + AZR 통합 검증 스위트

전체 시스템의 검증을 위한 통합 스크립트:
1. 환경 검증 
2. 단위 테스트
3. 미니 통합 테스트 (1라운드 실행)
4. 성능 벤치마크
5. 최종 검증 보고서 생성
"""

import os
import sys
import json
import subprocess
import tempfile
import time
from datetime import datetime
from pathlib import Path

# 경로 설정
sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2')

def run_command(command, description, timeout=300):
    """명령어 실행 및 결과 반환"""
    
    print(f"🔄 {description}")
    print(f"   Command: {command}")
    
    start_time = time.time()
    
    try:
        result = subprocess.run(
            command,
            shell=True,
            capture_output=True,
            text=True,
            timeout=timeout,
            cwd='/home/ubuntu/RLVR/TestTime-RLVR-v2'
        )
        
        duration = time.time() - start_time
        
        if result.returncode == 0:
            print(f"✅ {description} completed ({duration:.1f}s)")
            return True, result.stdout, result.stderr
        else:
            print(f"❌ {description} failed ({duration:.1f}s)")
            print(f"   Error: {result.stderr}")
            return False, result.stdout, result.stderr
            
    except subprocess.TimeoutExpired:
        print(f"⏰ {description} timed out after {timeout}s")
        return False, "", "Timeout"
    except Exception as e:
        print(f"💥 {description} crashed: {e}")
        return False, "", str(e)


def run_environment_validation():
    """환경 검증 실행"""
    
    print("\n" + "="*60)
    print("1️⃣ 환경 검증")
    print("="*60)
    
    success, _, _ = run_command(
        "cd /home/ubuntu/RLVR/TestTime-RLVR-v2/test && python validate_environment.py",
        "Environment validation"
    )
    
    return success


def run_unit_tests():
    """단위 테스트 실행"""
    
    print("\n" + "="*60)
    print("2️⃣ 단위 테스트")
    print("="*60)
    
    success, _, _ = run_command(
        "cd /home/ubuntu/RLVR/TestTime-RLVR-v2/test && python test_ttrlvr_azr_integration.py",
        "Unit tests"
    )
    
    return success


def run_mini_integration_test():
    """미니 통합 테스트 (1문제, 2라운드)"""
    
    print("\n" + "="*60)
    print("3️⃣ 미니 통합 테스트")
    print("="*60)
    
    # 짧은 통합 테스트 실행
    success, stdout, stderr = run_command(
        "cd /home/ubuntu/RLVR/TestTime-RLVR-v2/test && python train_ttrlvr_azr.py --benchmark mbpp --problems 1 --rounds 2 --debug",
        "Mini integration test (1 problem, 2 rounds)",
        timeout=1800  # 30분
    )
    
    if success:
        print("✅ Mini integration test completed successfully")
        # 결과 파일 확인
        results_dir = Path("/home/ubuntu/RLVR/TestTime-RLVR-v2/test/results/ttrlvr_azr")
        if results_dir.exists():
            latest_result = max(results_dir.glob("*"), key=os.path.getctime, default=None)
            if latest_result:
                print(f"📁 Results saved to: {latest_result}")
                
                # 결과 파일 분석
                result_file = latest_result / "training_results.json"
                if result_file.exists():
                    with open(result_file, 'r') as f:
                        results = json.load(f)
                    
                    print(f"📊 Test summary:")
                    print(f"   - Success: {results.get('success', False)}")
                    print(f"   - Completed rounds: {len(results.get('rounds', {}))}")
                    print(f"   - Final model: {results.get('final_model', 'N/A')}")
    
    return success


def check_disk_space():
    """디스크 공간 확인"""
    
    print("\n" + "="*60)
    print("4️⃣ 디스크 공간 확인")
    print("="*60)
    
    # 중요 디렉토리들의 디스크 사용량 확인
    paths_to_check = [
        "/home/ubuntu/RLVR",
        "/data",
        "/tmp"
    ]
    
    all_good = True
    
    for path in paths_to_check:
        if os.path.exists(path):
            success, stdout, _ = run_command(f"df -h {path}", f"Disk usage for {path}")
            if success:
                lines = stdout.strip().split('\n')
                if len(lines) > 1:
                    fields = lines[1].split()
                    if len(fields) >= 5:
                        used_percent = fields[4].rstrip('%')
                        if used_percent.isdigit() and int(used_percent) > 90:
                            print(f"⚠️ Warning: {path} is {used_percent}% full")
                            all_good = False
                        else:
                            print(f"✅ {path}: {used_percent}% used")
        else:
            print(f"⚠️ Path not found: {path}")
    
    return all_good


def run_performance_benchmark():
    """성능 벤치마크"""
    
    print("\n" + "="*60)
    print("5️⃣ 성능 벤치마크")  
    print("="*60)
    
    # GPU 메모리 사용량 확인
    print("🖥️ GPU 메모리 상태:")
    gpu_success, gpu_output, _ = run_command("nvidia-smi --query-gpu=memory.used,memory.total --format=csv,noheader,nounits", "GPU memory check")
    
    if gpu_success:
        for i, line in enumerate(gpu_output.strip().split('\n')):
            if line.strip():
                try:
                    used, total = map(int, line.split(', '))
                    usage_percent = (used / total) * 100
                    print(f"   GPU {i}: {used}MB / {total}MB ({usage_percent:.1f}%)")
                except:
                    print(f"   GPU {i}: {line}")
    
    # 시스템 메모리 확인
    print("\n💾 시스템 메모리 상태:")
    mem_success, mem_output, _ = run_command("free -h", "System memory check")
    if mem_success:
        for line in mem_output.split('\n')[:2]:  # 첫 2줄만
            print(f"   {line}")
    
    # CPU 사용률 확인
    print("\n🖥️ CPU 상태:")
    cpu_success, cpu_output, _ = run_command("top -bn1 | grep 'Cpu(s)' | head -1", "CPU usage check")
    if cpu_success:
        print(f"   {cpu_output.strip()}")
    
    return gpu_success and mem_success


def generate_validation_report(results):
    """검증 보고서 생성"""
    
    print("\n" + "="*60)
    print("6️⃣ 검증 보고서 생성")
    print("="*60)
    
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    report_file = f"/tmp/ttrlvr_azr_validation_report_{timestamp}.json"
    
    # 보고서 데이터
    report = {
        'timestamp': datetime.now().isoformat(),
        'validation_results': results,
        'summary': {
            'total_tests': len(results),
            'passed_tests': sum(1 for result in results.values() if result['success']),
            'overall_success': all(result['success'] for result in results.values())
        },
        'recommendations': []
    }
    
    # HTML 보고서 생성
    html_report = f"/tmp/ttrlvr_azr_validation_report_{timestamp}.html"
    
    html_content = f"""
<!DOCTYPE html>
<html>
<head>
    <title>TTRLVR + AZR Validation Report</title>
    <style>
        body {{ font-family: Arial, sans-serif; margin: 40px; }}
        .header {{ background-color: #f0f0f0; padding: 20px; border-radius: 5px; }}
        .success {{ color: green; }}
        .failure {{ color: red; }}
        .test-section {{ margin: 20px 0; padding: 15px; border: 1px solid #ddd; border-radius: 5px; }}
        .recommendations {{ background-color: #fff3cd; padding: 15px; border-radius: 5px; }}
    </style>
</head>
<body>
    <div class="header">
        <h1>TTRLVR + AZR Integration Validation Report</h1>
        <p>Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
        <p>Overall Status: <span class="{'success' if report['summary']['overall_success'] else 'failure'}">
            {'✅ ALL TESTS PASSED' if report['summary']['overall_success'] else '❌ SOME TESTS FAILED'}
        </span></p>
        <p>Tests: {report['summary']['passed_tests']}/{report['summary']['total_tests']} passed</p>
    </div>
    
    <h2>Test Results</h2>
"""
    
    for test_name, result in results.items():
        status = "success" if result['success'] else "failure"
        icon = "✅" if result['success'] else "❌"
        
        html_content += f"""
    <div class="test-section">
        <h3 class="{status}">{icon} {test_name}</h3>
        <p><strong>Duration:</strong> {result.get('duration', 'N/A')}</p>
        <p><strong>Details:</strong> {result.get('details', 'No details available')}</p>
    </div>
"""
    
    if report['recommendations']:
        html_content += """
    <div class="recommendations">
        <h2>Recommendations</h2>
        <ul>
"""
        for rec in report['recommendations']:
            html_content += f"<li>{rec}</li>"
        
        html_content += """
        </ul>
    </div>
"""
    
    html_content += """
</body>
</html>
"""
    
    # 파일 저장
    with open(report_file, 'w') as f:
        json.dump(report, f, indent=2)
    
    with open(html_report, 'w') as f:
        f.write(html_content)
    
    print(f"📄 JSON 보고서: {report_file}")
    print(f"🌐 HTML 보고서: {html_report}")
    
    return report


def main():
    """메인 실행 함수"""
    
    print("🧪 TTRLVR + AZR 통합 검증 스위트 시작")
    print("=" * 60)
    print(f"시작 시간: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print("=" * 60)
    
    # 검증 결과 저장
    results = {}
    start_time = time.time()
    
    # 1. 환경 검증
    test_start = time.time()
    success = run_environment_validation()
    results['environment_validation'] = {
        'success': success,
        'duration': f"{time.time() - test_start:.1f}s",
        'details': 'Environment setup and dependencies check'
    }
    
    # 2. 단위 테스트 (환경 검증 성공 시에만)
    if success:
        test_start = time.time()
        success = run_unit_tests()
        results['unit_tests'] = {
            'success': success,
            'duration': f"{time.time() - test_start:.1f}s", 
            'details': 'Component unit tests and integration tests'
        }
    else:
        results['unit_tests'] = {
            'success': False,
            'duration': '0s',
            'details': 'Skipped due to environment validation failure'
        }
    
    # 3. 미니 통합 테스트 (이전 테스트들 성공 시에만)
    if results['unit_tests']['success']:
        test_start = time.time()
        success = run_mini_integration_test()
        results['mini_integration_test'] = {
            'success': success,
            'duration': f"{time.time() - test_start:.1f}s",
            'details': 'End-to-end pipeline test with 1 problem, 2 rounds'
        }
    else:
        results['mini_integration_test'] = {
            'success': False,
            'duration': '0s',
            'details': 'Skipped due to previous test failures'
        }
    
    # 4. 디스크 공간 확인 (항상 실행)
    test_start = time.time()
    success = check_disk_space()
    results['disk_space_check'] = {
        'success': success,
        'duration': f"{time.time() - test_start:.1f}s",
        'details': 'Available disk space in critical directories'
    }
    
    # 5. 성능 벤치마크 (항상 실행)
    test_start = time.time()
    success = run_performance_benchmark()
    results['performance_benchmark'] = {
        'success': success,
        'duration': f"{time.time() - test_start:.1f}s",
        'details': 'System resource usage and performance metrics'
    }
    
    # 6. 보고서 생성
    total_duration = time.time() - start_time
    print(f"\n⏱️ 총 실행 시간: {total_duration:.1f}초 ({total_duration/60:.1f}분)")
    
    report = generate_validation_report(results)
    
    # 최종 결과
    print("\n" + "="*60)
    print("🏁 검증 스위트 완료")
    print("="*60)
    
    passed = sum(1 for result in results.values() if result['success'])
    total = len(results)
    
    print(f"📊 최종 결과: {passed}/{total} 테스트 통과")
    
    if report['summary']['overall_success']:
        print("🎉 모든 검증 통과! TTRLVR + AZR 시스템 실행 준비 완료")
        return 0
    else:
        print("⚠️ 일부 검증 실패. 위의 결과를 확인하고 문제를 해결하세요.")
        return 1


if __name__ == '__main__':
    exit_code = main()
    sys.exit(exit_code)