#!/usr/bin/env python3
"""
VeRL 학습 단독 테스트 스크립트
기존 데이터로 Step 5만 실행하여 학습이 제대로 되는지 확인
"""
import os
import sys
sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2')
sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2/test')
sys.path.append('/home/ubuntu/RLVR/verl')

from utils.iterative_trainer import IterativeTrainer
from absolute_zero_reasoner.testtime.config import TestTimeConfig
from absolute_zero_reasoner.testtime.logger import TestTimeLogger
import torch

def test_verl_training():
    """기존 데이터로 VeRL 학습만 테스트"""
    
    # 기본 설정
    config = TestTimeConfig(
        model_name="Qwen/Qwen2.5-7B",
        batch_size=8,
        temperature=0.7,
        top_k=50,
        top_p=0.9,
        max_new_tokens=2048,
        save_model=True
    )
    
    # 로거 설정
    logger = TestTimeLogger(
        log_dir="/home/ubuntu/RLVR/TestTime-RLVR-v2/test_verl_logs",
        log_level="INFO"
    )
    
    # Trainer 초기화
    trainer = IterativeTrainer(
        config=config,
        logger=logger,
        verl_config_path="/home/ubuntu/RLVR/TestTime-RLVR-v2/test/configs/ttrlvr_azr_ppo_4gpu.yaml",
        save_every_round=True
    )
    
    # 학습 데이터 경로
    training_data_path = "/home/ubuntu/RLVR/TestTime-RLVR-v2/tmp/batch_results/ttrlvr_azr_20250805_142357/mbpp/Mbpp_2/round_1/azr_training_data"
    
    logger.log_info("="*80)
    logger.log_info("🧪 VeRL Training Test - Step 5 Only")
    logger.log_info("="*80)
    logger.log_info(f"📁 Training data: {training_data_path}")
    
    # 데이터 파일 확인
    if os.path.exists(training_data_path):
        files = os.listdir(training_data_path)
        logger.log_info(f"📄 Found {len(files)} files:")
        for f in sorted(files):
            if f.endswith('.parquet'):
                file_path = os.path.join(training_data_path, f)
                file_size = os.path.getsize(file_path) / 1024 / 1024  # MB
                logger.log_info(f"   - {f}: {file_size:.2f} MB")
    else:
        logger.log_error(f"❌ Training data directory not found: {training_data_path}")
        return
    
    # GPU 메모리 상태 확인
    if torch.cuda.is_available():
        logger.log_info(f"🖥️  GPU available: {torch.cuda.get_device_name(0)}")
        logger.log_info(f"📊 GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
        
        # 현재 GPU 사용량
        allocated = torch.cuda.memory_allocated() / 1024**3
        reserved = torch.cuda.memory_reserved() / 1024**3
        logger.log_info(f"📊 Current usage - Allocated: {allocated:.2f} GB, Reserved: {reserved:.2f} GB")
    
    # VeRL 학습만 실행 (Step 5)
    try:
        logger.log_info("\n" + "="*80)
        logger.log_info("🚀 Starting VeRL training test...")
        logger.log_info("="*80 + "\n")
        
        # run_verl_training_only 메서드 호출
        result = trainer.run_verl_training_only(
            training_data_path=training_data_path,
            round_num=1,
            experiment_name="verl_test_step5_only"
        )
        
        if result['success']:
            logger.log_info("\n" + "="*80)
            logger.log_info("✅ VeRL training test completed successfully!")
            logger.log_info("="*80)
            
            # 결과 요약
            if 'duration' in result:
                logger.log_info(f"⏱️  Training duration: {result['duration']:.2f} seconds")
            if 'model_path' in result:
                logger.log_info(f"💾 Model saved to: {result['model_path']}")
                
            # llm_responses 확인
            llm_responses_dir = os.path.join(training_data_path, "llm_responses")
            if os.path.exists(llm_responses_dir):
                response_files = [f for f in os.listdir(llm_responses_dir) if f.endswith('.jsonl')]
                logger.log_info(f"📝 Generated {len(response_files)} response files")
                for f in sorted(response_files)[:5]:  # 처음 5개만 표시
                    logger.log_info(f"   - {f}")
            else:
                logger.log_warning("⚠️  No llm_responses directory found")
                
        else:
            logger.log_error("\n" + "="*80)
            logger.log_error(f"❌ VeRL training failed: {result.get('error', 'Unknown error')}")
            logger.log_error("="*80)
            
    except Exception as e:
        logger.log_error(f"\n💥 Test failed with exception: {e}")
        import traceback
        traceback.print_exc()
        
    finally:
        # 정리
        logger.log_info("\n🧹 Cleaning up...")
        if hasattr(trainer, 'cleanup'):
            trainer.cleanup()
        
        # 최종 GPU 메모리 상태
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            allocated = torch.cuda.memory_allocated() / 1024**3
            reserved = torch.cuda.memory_reserved() / 1024**3
            logger.log_info(f"📊 Final GPU usage - Allocated: {allocated:.2f} GB, Reserved: {reserved:.2f} GB")
        
        logger.log_info("\n✅ Test script completed")


if __name__ == "__main__":
    # 환경 변수 설정
    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"  # 4개 GPU 사용
    os.environ["TOKENIZERS_PARALLELISM"] = "false"
    os.environ["NCCL_DEBUG"] = "WARN"
    
    print("\n" + "="*80)
    print("🧪 VeRL Training Test - Testing Step 5 Only")
    print("📁 Using existing data from previous run")
    print("="*80 + "\n")
    
    test_verl_training()