import lighteval from lighteval.logging.evaluation_tracker import EvaluationTracker from lighteval.models.vllm.vllm_model import VLLMModelConfig from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters from lighteval.utils.imports import is_package_available from lighteval.tasks.lighteval_task import LightevalTask import os import torch import warnings def main(): # ===== AUTOMATICALLY DETECT NUMBER OF GPUs ===== os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" # Set VLLM multiprocessing method before any CUDA initialization os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" os.environ["VLLM_USE_RAY_COMPILED_DAG"] = "1" num_gpus = torch.cuda.device_count() print(f"\n{'='*100}") print(f"Detected {num_gpus} GPU(s)") if num_gpus > 0: for i in range(num_gpus): print(f" GPU {i}: {torch.cuda.get_device_name(i)}") print(f"{'='*100}\n") # Optional: Specify which GPUs to use # If you want to use only specific GPUs, uncomment and modify: # os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Use only GPU 0 # os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" # Use only GPU 0 and 1 # num_gpus = len(os.environ["CUDA_VISIBLE_DEVICES"].split(",")) evaluation_tracker = EvaluationTracker( output_dir="./results", save_details=True, push_to_hub=False, ) pipeline_params = PipelineParameters( launcher_type=ParallelismManager.ACCELERATE, custom_tasks_directory=None, max_samples=500 ) # model_config = VLLMModelConfig( # model_name="Qwen/Qwen2.5-7B-Math", # dtype="bfloat16", # Use dtype instead of torch_dtype # max_model_length=4096, # trust_remote_code=True, # tensor_parallel_size=num_gpus, # Automatically set based on detected GPUs # ) model_config = VLLMModelConfig( # model_name="Qwen/Qwen2.5-1.5B", # model_name="Elliott/LUFFY-Qwen-Math-7B-Zero", # model_name="/public/home/lshi/yoAI/projects/Online_CL/LUFFY/data/save_model/Qwen2.5-Math-1.5B", model_name="/public/home/lshi/yoAI/projects/Online_CL/LUFFY/data/save_model/Qwen2.5-1.5B", # model_name="/public/home/lshi/yoAI/projects/Online_CL/VERL_SFT/outputs/Qwen2.5-1.5B-OpenR1-SFT/global_step_5724", # model_name="/public/home/lshi/yoAI/projects/Online_CL/train/model_sft_save/Qwen2.5-1.5B-Full-solution", # model_name="/public/home/lshi/yoAI/projects/Online_CL/train/model_sft_save/Qwen2.5-1.5B-Entropy-solution-1/checkpoint-1950", dtype="bfloat16", max_model_length=4096, ## 8192 trust_remote_code=True, tensor_parallel_size=num_gpus, # Automatically set based on detected GPUs gpu_memory_utilization=0.90, ) task = "lighteval|math_500|0" # aime24 aime24_gpassk print(f"Using {num_gpus} GPU(s) with tensor parallelism\n") pipeline = Pipeline( tasks=task, pipeline_parameters=pipeline_params, evaluation_tracker=evaluation_tracker, model_config=model_config, ) # Fix generation_size for task_name, task_obj in pipeline.tasks_dict.items(): for doc in task_obj._docs: doc.generation_size = 2048 try: pipeline.evaluate() # pipeline.save_and_push_results() pipeline.show_results() finally: # Proper cleanup to avoid resource leaks if torch.distributed.is_initialized(): torch.distributed.destroy_process_group() if __name__ == "__main__": main()