File size: 3,586 Bytes
import lighteval
from lighteval.logging.evaluation_tracker import EvaluationTracker
from lighteval.models.vllm.vllm_model import VLLMModelConfig
from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters
from lighteval.utils.imports import is_package_available
from lighteval.tasks.lighteval_task import LightevalTask
import os
import torch
import warnings

def main():
    # ===== AUTOMATICALLY DETECT NUMBER OF GPUs =====
    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
    
    # Set VLLM multiprocessing method before any CUDA initialization
    os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
    
    os.environ["VLLM_USE_RAY_COMPILED_DAG"] = "1"
    
    num_gpus = torch.cuda.device_count()
    print(f"\n{'='*100}")
    print(f"Detected {num_gpus} GPU(s)")
    
    if num_gpus > 0:
        for i in range(num_gpus):
            print(f"  GPU {i}: {torch.cuda.get_device_name(i)}")
    print(f"{'='*100}\n")
    
    # Optional: Specify which GPUs to use
    # If you want to use only specific GPUs, uncomment and modify:
    # os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # Use only GPU 0
    # os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"  # Use only GPU 0 and 1
    # num_gpus = len(os.environ["CUDA_VISIBLE_DEVICES"].split(","))
    
    evaluation_tracker = EvaluationTracker(
        output_dir="./results",
        save_details=True,
        push_to_hub=False,
    )

    pipeline_params = PipelineParameters(
        launcher_type=ParallelismManager.ACCELERATE,
        custom_tasks_directory=None,
        max_samples=500
    )

    # model_config = VLLMModelConfig(
    #     model_name="Qwen/Qwen2.5-7B-Math",
    #     dtype="bfloat16",  # Use dtype instead of torch_dtype
    #     max_model_length=4096,
    #     trust_remote_code=True,
    #     tensor_parallel_size=num_gpus,  # Automatically set based on detected GPUs
    # )

    model_config = VLLMModelConfig(
        # model_name="Qwen/Qwen2.5-1.5B",
        # model_name="Elliott/LUFFY-Qwen-Math-7B-Zero",
        # model_name="/public/home/lshi/yoAI/projects/Online_CL/LUFFY/data/save_model/Qwen2.5-Math-1.5B",
        model_name="/public/home/lshi/yoAI/projects/Online_CL/LUFFY/data/save_model/Qwen2.5-1.5B",
        # model_name="/public/home/lshi/yoAI/projects/Online_CL/VERL_SFT/outputs/Qwen2.5-1.5B-OpenR1-SFT/global_step_5724",
        # model_name="/public/home/lshi/yoAI/projects/Online_CL/train/model_sft_save/Qwen2.5-1.5B-Full-solution",
        # model_name="/public/home/lshi/yoAI/projects/Online_CL/train/model_sft_save/Qwen2.5-1.5B-Entropy-solution-1/checkpoint-1950",
        dtype="bfloat16",
        max_model_length=4096,  ## 8192
        trust_remote_code=True,
        tensor_parallel_size=num_gpus,  # Automatically set based on detected GPUs
        gpu_memory_utilization=0.90,
    )
    task = "lighteval|math_500|0" # aime24 aime24_gpassk

    print(f"Using {num_gpus} GPU(s) with tensor parallelism\n")

    pipeline = Pipeline(
        tasks=task,
        pipeline_parameters=pipeline_params,
        evaluation_tracker=evaluation_tracker,
        model_config=model_config,
    )

    # Fix generation_size
    for task_name, task_obj in pipeline.tasks_dict.items():
        for doc in task_obj._docs:
            doc.generation_size = 2048
    try:
        pipeline.evaluate()
        # pipeline.save_and_push_results()
        pipeline.show_results()
    finally:
        # Proper cleanup to avoid resource leaks
        if torch.distributed.is_initialized():
            torch.distributed.destroy_process_group()

if __name__ == "__main__":
    main()