File size: 4,736 Bytes
bd895ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import lighteval
from lighteval.logging.evaluation_tracker import EvaluationTracker
from lighteval.models.vllm.vllm_model import VLLMModelConfig
from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters
from lighteval.utils.imports import is_package_available
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import os
import torch

if is_package_available("accelerate"):
    from datetime import timedelta
    from accelerate import Accelerator, InitProcessGroupKwargs
    accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))])
else:
    accelerator = None

def merge_lora_if_needed():
    """Merge LoRA model if merged version doesn't exist"""
    merged_path = "/public/home/lshi/yoAI/projects/Online_CL/train/model_sft_save/Qwen2.5-Math-1.5B-DeepScaleR-Merged"
    
    # Check if merged model already exists
    if os.path.exists(os.path.join(merged_path, "config.json")):
        print(f"Merged model already exists at {merged_path}")
        return merged_path
    
    print("="*100)
    print("Merged model not found. Starting merge process...")
    print("="*100)
    
    print("\n[1/5] Loading base model...")
    base_model = AutoModelForCausalLM.from_pretrained(
        "Qwen/Qwen2.5-Math-1.5B",
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        device_map="auto"
    )
    
    print("\n[2/5] Loading LoRA adapter...")
    lora_path = "/public/home/lshi/yoAI/projects/Online_CL/train/model_sft_save/Qwen2.5-Math-1.5B-DeepScaleR-Lora/checkpoint-2834"
    
    model = PeftModel.from_pretrained(base_model, lora_path)
    
    print("\n[3/5] Merging LoRA weights with base model...")
    merged_model = model.merge_and_unload()
    
    print(f"\n[4/5] Saving merged model to {merged_path}...")
    os.makedirs(merged_path, exist_ok=True)
    merged_model.save_pretrained(merged_path, safe_serialization=True)
    
    print("\n[5/5] Saving tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Math-1.5B", trust_remote_code=True)
    tokenizer.save_pretrained(merged_path)
    
    print("\n" + "="*100)
    print("✓ Merge completed successfully!")
    print(f"✓ Merged model saved to: {merged_path}")
    print("="*100 + "\n")
    
    # Clean up to free memory before evaluation
    del base_model
    del model
    del merged_model
    torch.cuda.empty_cache()
    
    return merged_path

def main():
    # ===== SET CUDA_VISIBLE_DEVICES FIRST (BEFORE ANY TORCH OPERATIONS) =====
    # Option 1: Use only GPU 2
    os.environ["CUDA_VISIBLE_DEVICES"] = "2"
    
    # First, ensure the merged model exists
    print("Checking for merged model...")
    merged_model_path = merge_lora_if_needed()
    
    # ===== DETECT NUMBER OF GPUs AFTER SETTING CUDA_VISIBLE_DEVICES =====
    num_gpus = torch.cuda.device_count()
    print(f"\n{'='*100}")
    print(f"Detected {num_gpus} GPU(s) (after CUDA_VISIBLE_DEVICES filtering)")
    
    if num_gpus > 0:
        for i in range(num_gpus):
            print(f"  GPU {i}: {torch.cuda.get_device_name(i)}")
    print(f"{'='*100}\n")
    
    print("Setting up evaluation pipeline...")
    
    evaluation_tracker = EvaluationTracker(
        output_dir="./results",
        save_details=True,
        push_to_hub=False,
        # hub_results_org="your_username",
    )

    pipeline_params = PipelineParameters(
        launcher_type=ParallelismManager.ACCELERATE,
        custom_tasks_directory=None,
        max_samples=500
    )

    model_config = VLLMModelConfig(
        model_name=merged_model_path,
        dtype="bfloat16",
        max_model_length=4096,
        trust_remote_code=True,
        tensor_parallel_size=num_gpus,  # This will now correctly use only visible GPUs
    )

    task = "lighteval|math_500|0"  # aime24 aime24_gpassk

    print(f"Using {num_gpus} GPU(s) with tensor parallelism")
    print(f"Task: {task}\n")

    print("Creating pipeline...")
    pipeline = Pipeline(
        tasks=task,
        pipeline_parameters=pipeline_params,
        evaluation_tracker=evaluation_tracker,
        model_config=model_config,
    )

    # Fix generation_size
    print("Configuring generation parameters...")
    for task_name, task_obj in pipeline.tasks_dict.items():
        for doc in task_obj._docs:
            doc.generation_size = 2048
    
    print("\nStarting evaluation...")
    print("="*100)
    pipeline.evaluate()
    
    print("\nSaving results...")
    pipeline.save_and_push_results()
    
    print("\nShowing results...")
    pipeline.show_results()
    
    print("\n" + "="*100)
    print("✓ Evaluation completed!")
    print("="*100)

if __name__ == "__main__":
    main()