#!/usr/bin/env python3 """ VeRL 학습 단독 테스트 스크립트 기존 데이터로 Step 5만 실행하여 학습이 제대로 되는지 확인 """ import os import sys sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2') sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2/test') sys.path.append('/home/ubuntu/RLVR/verl') from utils.iterative_trainer import IterativeTrainer from absolute_zero_reasoner.testtime.config import TestTimeConfig from absolute_zero_reasoner.testtime.logger import TestTimeLogger import torch def test_verl_training(): """기존 데이터로 VeRL 학습만 테스트""" # 기본 설정 config = TestTimeConfig( model_name="Qwen/Qwen2.5-7B", batch_size=8, temperature=0.7, top_k=50, top_p=0.9, max_new_tokens=2048, save_model=True ) # 로거 설정 logger = TestTimeLogger( log_dir="/home/ubuntu/RLVR/TestTime-RLVR-v2/test_verl_logs", log_level="INFO" ) # Trainer 초기화 trainer = IterativeTrainer( config=config, logger=logger, verl_config_path="/home/ubuntu/RLVR/TestTime-RLVR-v2/test/configs/ttrlvr_azr_ppo_4gpu.yaml", save_every_round=True ) # 학습 데이터 경로 training_data_path = "/home/ubuntu/RLVR/TestTime-RLVR-v2/tmp/batch_results/ttrlvr_azr_20250805_142357/mbpp/Mbpp_2/round_1/azr_training_data" logger.log_info("="*80) logger.log_info("🧪 VeRL Training Test - Step 5 Only") logger.log_info("="*80) logger.log_info(f"📁 Training data: {training_data_path}") # 데이터 파일 확인 if os.path.exists(training_data_path): files = os.listdir(training_data_path) logger.log_info(f"📄 Found {len(files)} files:") for f in sorted(files): if f.endswith('.parquet'): file_path = os.path.join(training_data_path, f) file_size = os.path.getsize(file_path) / 1024 / 1024 # MB logger.log_info(f" - {f}: {file_size:.2f} MB") else: logger.log_error(f"❌ Training data directory not found: {training_data_path}") return # GPU 메모리 상태 확인 if torch.cuda.is_available(): logger.log_info(f"🖥️ GPU available: {torch.cuda.get_device_name(0)}") logger.log_info(f"📊 GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB") # 현재 GPU 사용량 allocated = torch.cuda.memory_allocated() / 1024**3 reserved = torch.cuda.memory_reserved() / 1024**3 logger.log_info(f"📊 Current usage - Allocated: {allocated:.2f} GB, Reserved: {reserved:.2f} GB") # VeRL 학습만 실행 (Step 5) try: logger.log_info("\n" + "="*80) logger.log_info("🚀 Starting VeRL training test...") logger.log_info("="*80 + "\n") # run_verl_training_only 메서드 호출 result = trainer.run_verl_training_only( training_data_path=training_data_path, round_num=1, experiment_name="verl_test_step5_only" ) if result['success']: logger.log_info("\n" + "="*80) logger.log_info("✅ VeRL training test completed successfully!") logger.log_info("="*80) # 결과 요약 if 'duration' in result: logger.log_info(f"⏱️ Training duration: {result['duration']:.2f} seconds") if 'model_path' in result: logger.log_info(f"💾 Model saved to: {result['model_path']}") # llm_responses 확인 llm_responses_dir = os.path.join(training_data_path, "llm_responses") if os.path.exists(llm_responses_dir): response_files = [f for f in os.listdir(llm_responses_dir) if f.endswith('.jsonl')] logger.log_info(f"📝 Generated {len(response_files)} response files") for f in sorted(response_files)[:5]: # 처음 5개만 표시 logger.log_info(f" - {f}") else: logger.log_warning("⚠️ No llm_responses directory found") else: logger.log_error("\n" + "="*80) logger.log_error(f"❌ VeRL training failed: {result.get('error', 'Unknown error')}") logger.log_error("="*80) except Exception as e: logger.log_error(f"\n💥 Test failed with exception: {e}") import traceback traceback.print_exc() finally: # 정리 logger.log_info("\n🧹 Cleaning up...") if hasattr(trainer, 'cleanup'): trainer.cleanup() # 최종 GPU 메모리 상태 if torch.cuda.is_available(): torch.cuda.empty_cache() allocated = torch.cuda.memory_allocated() / 1024**3 reserved = torch.cuda.memory_reserved() / 1024**3 logger.log_info(f"📊 Final GPU usage - Allocated: {allocated:.2f} GB, Reserved: {reserved:.2f} GB") logger.log_info("\n✅ Test script completed") if __name__ == "__main__": # 환경 변수 설정 os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3" # 4개 GPU 사용 os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["NCCL_DEBUG"] = "WARN" print("\n" + "="*80) print("🧪 VeRL Training Test - Testing Step 5 Only") print("📁 Using existing data from previous run") print("="*80 + "\n") test_verl_training()