File size: 7,264 Bytes
232ac88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import lighteval
from lighteval.logging.evaluation_tracker import EvaluationTracker
from lighteval.models.vllm.vllm_model import VLLMModelConfig
from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters
from lighteval.utils.imports import is_package_available
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
import os
import torch
import json

if is_package_available("accelerate"):
    from datetime import timedelta
    from accelerate import Accelerator, InitProcessGroupKwargs
    accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))])
else:
    accelerator = None

def merge_lora_if_needed():
    """Merge LoRA model and preserve RoPE scaling configuration"""
    merged_path = "/public/home/lshi/yoAI/projects/Online_CL/train/model_sft_save/Qwen2.5-Math-1.5B-Lora-Merged"
    
    if os.path.exists(os.path.join(merged_path, "config.json")):
        print(f"Merged model already exists at {merged_path}")
        
        # Verify RoPE scaling in existing merged model
        config_path = os.path.join(merged_path, "config.json")
        with open(config_path, 'r') as f:
            config = json.load(f)
            if 'rope_scaling' in config:
                print(f"✓ Existing merged model has RoPE scaling: {config['rope_scaling']}")
                print(f"✓ Max position embeddings: {config.get('max_position_embeddings', 'N/A')}")
            else:
                print("⚠ Warning: Existing merged model does NOT have RoPE scaling config!")
                print("  Deleting and re-creating with RoPE scaling...")
                import shutil
                shutil.rmtree(merged_path)
                return merge_lora_if_needed()  # Recursive call to re-create
        
        return merged_path

    print("="*100)
    print("Merged model not found. Starting merge process...")
    print("="*100)

    lora_path = "/public/home/lshi/yoAI/projects/Online_CL/train/model_sft_save/Qwen2.5-Math-1.5B-Lora"
    
    # Step 1: Load base model
    print("\n[1/5] Loading base model...")
    base_model = AutoModelForCausalLM.from_pretrained(
        "Qwen/Qwen2.5-Math-1.5B",
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        device_map="auto"
    )
    
    # Step 2: Load LoRA adapter
    print("\n[2/5] Loading LoRA adapter...")
    model = PeftModel.from_pretrained(base_model, lora_path)
    
    # Step 3: Merge and unload
    print("\n[3/5] Merging LoRA weights with base model...")
    merged_model = model.merge_and_unload()
    
    # Step 4: Save merged model
    print(f"\n[4/5] Saving merged model to {merged_path}...")
    os.makedirs(merged_path, exist_ok=True)
    merged_model.save_pretrained(merged_path, safe_serialization=True)
    
    # Step 5: Add RoPE scaling configuration
    print("\n[5/5] Adding RoPE scaling configuration...")
    merged_config_path = os.path.join(merged_path, "config.json")
    with open(merged_config_path, 'r') as f:
        merged_config = json.load(f)
    
    # ========== RoPE scaling: 4096 -> 8192, factor = 2.0 ==========
    merged_config['rope_scaling'] = {
        "type": "linear",
        "factor": 2.0
    }
    
    print(f"✓ Added RoPE scaling: {merged_config['rope_scaling']}")
    
    # 更新 max_position_embeddings 从 4096 到 8192
    original_max_pos = merged_config.get('max_position_embeddings', 4096)
    scaling_factor = merged_config['rope_scaling']['factor']
    new_max_pos = int(original_max_pos * scaling_factor)
    merged_config['max_position_embeddings'] = new_max_pos
    print(f"✓ Updated max_position_embeddings: {original_max_pos} -> {new_max_pos}")
    
    # Save updated config
    with open(merged_config_path, 'w') as f:
        json.dump(merged_config, f, indent=2, ensure_ascii=False)
    
    # Save tokenizer
    print("Saving tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Math-1.5B", trust_remote_code=True)
    tokenizer.save_pretrained(merged_path)
    
    # Clean up memory
    del base_model
    del model
    del merged_model
    torch.cuda.empty_cache()
    
    print("\n" + "="*100)
    print("✓ Merge completed successfully!")
    print(f"✓ Merged model saved to: {merged_path}")
    print(f"✓ RoPE scaling config: {merged_config['rope_scaling']}")
    print(f"✓ Max position embeddings: {merged_config['max_position_embeddings']}")
    print("="*100 + "\n")
    
    return merged_path

def main():
    # Set CUDA device FIRST before any CUDA operations
    os.environ["CUDA_VISIBLE_DEVICES"] = "2"
    os.environ["VLLM_ALLOW_LONG_MAX_MODEL_LEN"] = "1"
    
    print("Checking for merged model...")
    merged_model_path = merge_lora_if_needed()
    
    # Detect number of GPUs
    num_gpus = torch.cuda.device_count()
    print(f"\n{'='*100}")
    print(f"Detected {num_gpus} GPU(s)")
    if num_gpus > 0:
        for i in range(num_gpus):
            print(f"  GPU {i}: {torch.cuda.get_device_name(i)}")
    print(f"{'='*100}\n")
    
    # Read the merged model config to get max_model_length
    config_path = os.path.join(merged_model_path, "config.json")
    with open(config_path, 'r') as f:
        model_config_dict = json.load(f)
        max_position_embeddings = model_config_dict.get('max_position_embeddings', 4096)
        rope_scaling = model_config_dict.get('rope_scaling', None)
    
    print(f"Model max_position_embeddings: {max_position_embeddings}")
    print(f"Model RoPE scaling config: {rope_scaling}")
    
    # 使用 8192 作为 max_model_length(你训练时扩展后的长度)
    max_model_length = 8192
    print(f"Using max_model_length: {max_model_length}\n")
    
    print("Setting up evaluation pipeline...")
    
    evaluation_tracker = EvaluationTracker(
        output_dir="./results",
        save_details=True,
        push_to_hub=False,
    )

    pipeline_params = PipelineParameters(
        launcher_type=ParallelismManager.ACCELERATE,
        custom_tasks_directory=None,
        max_samples=500
    )

    model_config = VLLMModelConfig(
        model_name=merged_model_path,
        dtype="bfloat16",
        max_model_length=max_model_length,  # 使用 8192
        trust_remote_code=True,
        tensor_parallel_size=num_gpus,
    )

    task = "lighteval|math_500|0"

    print(f"Using {num_gpus} GPU(s) with tensor parallelism")
    print(f"Task: {task}")
    print(f"Max model length: {max_model_length}\n")

    print("Creating pipeline...")
    pipeline = Pipeline(
        tasks=task,
        pipeline_parameters=pipeline_params,
        evaluation_tracker=evaluation_tracker,
        model_config=model_config,
    )

    # Fix generation_size
    print("Configuring generation parameters...")
    for task_name, task_obj in pipeline.tasks_dict.items():
        for doc in task_obj._docs:
            doc.generation_size = 2048

    print("\nStarting evaluation...")
    print("="*100)
    pipeline.evaluate()
    
    print("\nSaving results...")
    pipeline.save_and_push_results()
    
    print("\nShowing results...")
    pipeline.show_results()
    
    print("\n" + "="*100)
    print("✓ Evaluation completed!")
    print("="*100)

if __name__ == "__main__":
    main()