File size: 7,264 Bytes
232ac88 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 |
import lighteval
from lighteval.logging.evaluation_tracker import EvaluationTracker
from lighteval.models.vllm.vllm_model import VLLMModelConfig
from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters
from lighteval.utils.imports import is_package_available
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
import os
import torch
import json
if is_package_available("accelerate"):
from datetime import timedelta
from accelerate import Accelerator, InitProcessGroupKwargs
accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))])
else:
accelerator = None
def merge_lora_if_needed():
"""Merge LoRA model and preserve RoPE scaling configuration"""
merged_path = "/public/home/lshi/yoAI/projects/Online_CL/train/model_sft_save/Qwen2.5-Math-1.5B-Lora-Merged"
if os.path.exists(os.path.join(merged_path, "config.json")):
print(f"Merged model already exists at {merged_path}")
# Verify RoPE scaling in existing merged model
config_path = os.path.join(merged_path, "config.json")
with open(config_path, 'r') as f:
config = json.load(f)
if 'rope_scaling' in config:
print(f"✓ Existing merged model has RoPE scaling: {config['rope_scaling']}")
print(f"✓ Max position embeddings: {config.get('max_position_embeddings', 'N/A')}")
else:
print("⚠ Warning: Existing merged model does NOT have RoPE scaling config!")
print(" Deleting and re-creating with RoPE scaling...")
import shutil
shutil.rmtree(merged_path)
return merge_lora_if_needed() # Recursive call to re-create
return merged_path
print("="*100)
print("Merged model not found. Starting merge process...")
print("="*100)
lora_path = "/public/home/lshi/yoAI/projects/Online_CL/train/model_sft_save/Qwen2.5-Math-1.5B-Lora"
# Step 1: Load base model
print("\n[1/5] Loading base model...")
base_model = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen2.5-Math-1.5B",
torch_dtype=torch.bfloat16,
trust_remote_code=True,
device_map="auto"
)
# Step 2: Load LoRA adapter
print("\n[2/5] Loading LoRA adapter...")
model = PeftModel.from_pretrained(base_model, lora_path)
# Step 3: Merge and unload
print("\n[3/5] Merging LoRA weights with base model...")
merged_model = model.merge_and_unload()
# Step 4: Save merged model
print(f"\n[4/5] Saving merged model to {merged_path}...")
os.makedirs(merged_path, exist_ok=True)
merged_model.save_pretrained(merged_path, safe_serialization=True)
# Step 5: Add RoPE scaling configuration
print("\n[5/5] Adding RoPE scaling configuration...")
merged_config_path = os.path.join(merged_path, "config.json")
with open(merged_config_path, 'r') as f:
merged_config = json.load(f)
# ========== RoPE scaling: 4096 -> 8192, factor = 2.0 ==========
merged_config['rope_scaling'] = {
"type": "linear",
"factor": 2.0
}
print(f"✓ Added RoPE scaling: {merged_config['rope_scaling']}")
# 更新 max_position_embeddings 从 4096 到 8192
original_max_pos = merged_config.get('max_position_embeddings', 4096)
scaling_factor = merged_config['rope_scaling']['factor']
new_max_pos = int(original_max_pos * scaling_factor)
merged_config['max_position_embeddings'] = new_max_pos
print(f"✓ Updated max_position_embeddings: {original_max_pos} -> {new_max_pos}")
# Save updated config
with open(merged_config_path, 'w') as f:
json.dump(merged_config, f, indent=2, ensure_ascii=False)
# Save tokenizer
print("Saving tokenizer...")
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Math-1.5B", trust_remote_code=True)
tokenizer.save_pretrained(merged_path)
# Clean up memory
del base_model
del model
del merged_model
torch.cuda.empty_cache()
print("\n" + "="*100)
print("✓ Merge completed successfully!")
print(f"✓ Merged model saved to: {merged_path}")
print(f"✓ RoPE scaling config: {merged_config['rope_scaling']}")
print(f"✓ Max position embeddings: {merged_config['max_position_embeddings']}")
print("="*100 + "\n")
return merged_path
def main():
# Set CUDA device FIRST before any CUDA operations
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
os.environ["VLLM_ALLOW_LONG_MAX_MODEL_LEN"] = "1"
print("Checking for merged model...")
merged_model_path = merge_lora_if_needed()
# Detect number of GPUs
num_gpus = torch.cuda.device_count()
print(f"\n{'='*100}")
print(f"Detected {num_gpus} GPU(s)")
if num_gpus > 0:
for i in range(num_gpus):
print(f" GPU {i}: {torch.cuda.get_device_name(i)}")
print(f"{'='*100}\n")
# Read the merged model config to get max_model_length
config_path = os.path.join(merged_model_path, "config.json")
with open(config_path, 'r') as f:
model_config_dict = json.load(f)
max_position_embeddings = model_config_dict.get('max_position_embeddings', 4096)
rope_scaling = model_config_dict.get('rope_scaling', None)
print(f"Model max_position_embeddings: {max_position_embeddings}")
print(f"Model RoPE scaling config: {rope_scaling}")
# 使用 8192 作为 max_model_length(你训练时扩展后的长度)
max_model_length = 8192
print(f"Using max_model_length: {max_model_length}\n")
print("Setting up evaluation pipeline...")
evaluation_tracker = EvaluationTracker(
output_dir="./results",
save_details=True,
push_to_hub=False,
)
pipeline_params = PipelineParameters(
launcher_type=ParallelismManager.ACCELERATE,
custom_tasks_directory=None,
max_samples=500
)
model_config = VLLMModelConfig(
model_name=merged_model_path,
dtype="bfloat16",
max_model_length=max_model_length, # 使用 8192
trust_remote_code=True,
tensor_parallel_size=num_gpus,
)
task = "lighteval|math_500|0"
print(f"Using {num_gpus} GPU(s) with tensor parallelism")
print(f"Task: {task}")
print(f"Max model length: {max_model_length}\n")
print("Creating pipeline...")
pipeline = Pipeline(
tasks=task,
pipeline_parameters=pipeline_params,
evaluation_tracker=evaluation_tracker,
model_config=model_config,
)
# Fix generation_size
print("Configuring generation parameters...")
for task_name, task_obj in pipeline.tasks_dict.items():
for doc in task_obj._docs:
doc.generation_size = 2048
print("\nStarting evaluation...")
print("="*100)
pipeline.evaluate()
print("\nSaving results...")
pipeline.save_and_push_results()
print("\nShowing results...")
pipeline.show_results()
print("\n" + "="*100)
print("✓ Evaluation completed!")
print("="*100)
if __name__ == "__main__":
main() |