|
|
import lighteval |
|
|
from lighteval.logging.evaluation_tracker import EvaluationTracker |
|
|
from lighteval.models.vllm.vllm_model import VLLMModelConfig |
|
|
from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters |
|
|
from lighteval.utils.imports import is_package_available |
|
|
from peft import PeftModel |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig |
|
|
import os |
|
|
import torch |
|
|
import json |
|
|
|
|
|
if is_package_available("accelerate"): |
|
|
from datetime import timedelta |
|
|
from accelerate import Accelerator, InitProcessGroupKwargs |
|
|
accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))]) |
|
|
else: |
|
|
accelerator = None |
|
|
|
|
|
def merge_lora_if_needed(): |
|
|
"""Merge LoRA model and preserve RoPE scaling configuration""" |
|
|
merged_path = "/public/home/lshi/yoAI/projects/Online_CL/train/model_sft_save/Qwen2.5-Math-1.5B-Lora-Merged" |
|
|
|
|
|
if os.path.exists(os.path.join(merged_path, "config.json")): |
|
|
print(f"Merged model already exists at {merged_path}") |
|
|
|
|
|
|
|
|
config_path = os.path.join(merged_path, "config.json") |
|
|
with open(config_path, 'r') as f: |
|
|
config = json.load(f) |
|
|
if 'rope_scaling' in config: |
|
|
print(f"✓ Existing merged model has RoPE scaling: {config['rope_scaling']}") |
|
|
print(f"✓ Max position embeddings: {config.get('max_position_embeddings', 'N/A')}") |
|
|
else: |
|
|
print("⚠ Warning: Existing merged model does NOT have RoPE scaling config!") |
|
|
print(" Deleting and re-creating with RoPE scaling...") |
|
|
import shutil |
|
|
shutil.rmtree(merged_path) |
|
|
return merge_lora_if_needed() |
|
|
|
|
|
return merged_path |
|
|
|
|
|
print("="*100) |
|
|
print("Merged model not found. Starting merge process...") |
|
|
print("="*100) |
|
|
|
|
|
lora_path = "/public/home/lshi/yoAI/projects/Online_CL/train/model_sft_save/Qwen2.5-Math-1.5B-Lora" |
|
|
|
|
|
|
|
|
print("\n[1/5] Loading base model...") |
|
|
base_model = AutoModelForCausalLM.from_pretrained( |
|
|
"Qwen/Qwen2.5-Math-1.5B", |
|
|
torch_dtype=torch.bfloat16, |
|
|
trust_remote_code=True, |
|
|
device_map="auto" |
|
|
) |
|
|
|
|
|
|
|
|
print("\n[2/5] Loading LoRA adapter...") |
|
|
model = PeftModel.from_pretrained(base_model, lora_path) |
|
|
|
|
|
|
|
|
print("\n[3/5] Merging LoRA weights with base model...") |
|
|
merged_model = model.merge_and_unload() |
|
|
|
|
|
|
|
|
print(f"\n[4/5] Saving merged model to {merged_path}...") |
|
|
os.makedirs(merged_path, exist_ok=True) |
|
|
merged_model.save_pretrained(merged_path, safe_serialization=True) |
|
|
|
|
|
|
|
|
print("\n[5/5] Adding RoPE scaling configuration...") |
|
|
merged_config_path = os.path.join(merged_path, "config.json") |
|
|
with open(merged_config_path, 'r') as f: |
|
|
merged_config = json.load(f) |
|
|
|
|
|
|
|
|
merged_config['rope_scaling'] = { |
|
|
"type": "linear", |
|
|
"factor": 2.0 |
|
|
} |
|
|
|
|
|
print(f"✓ Added RoPE scaling: {merged_config['rope_scaling']}") |
|
|
|
|
|
|
|
|
original_max_pos = merged_config.get('max_position_embeddings', 4096) |
|
|
scaling_factor = merged_config['rope_scaling']['factor'] |
|
|
new_max_pos = int(original_max_pos * scaling_factor) |
|
|
merged_config['max_position_embeddings'] = new_max_pos |
|
|
print(f"✓ Updated max_position_embeddings: {original_max_pos} -> {new_max_pos}") |
|
|
|
|
|
|
|
|
with open(merged_config_path, 'w') as f: |
|
|
json.dump(merged_config, f, indent=2, ensure_ascii=False) |
|
|
|
|
|
|
|
|
print("Saving tokenizer...") |
|
|
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Math-1.5B", trust_remote_code=True) |
|
|
tokenizer.save_pretrained(merged_path) |
|
|
|
|
|
|
|
|
del base_model |
|
|
del model |
|
|
del merged_model |
|
|
torch.cuda.empty_cache() |
|
|
|
|
|
print("\n" + "="*100) |
|
|
print("✓ Merge completed successfully!") |
|
|
print(f"✓ Merged model saved to: {merged_path}") |
|
|
print(f"✓ RoPE scaling config: {merged_config['rope_scaling']}") |
|
|
print(f"✓ Max position embeddings: {merged_config['max_position_embeddings']}") |
|
|
print("="*100 + "\n") |
|
|
|
|
|
return merged_path |
|
|
|
|
|
def main(): |
|
|
|
|
|
os.environ["CUDA_VISIBLE_DEVICES"] = "2" |
|
|
os.environ["VLLM_ALLOW_LONG_MAX_MODEL_LEN"] = "1" |
|
|
|
|
|
print("Checking for merged model...") |
|
|
merged_model_path = merge_lora_if_needed() |
|
|
|
|
|
|
|
|
num_gpus = torch.cuda.device_count() |
|
|
print(f"\n{'='*100}") |
|
|
print(f"Detected {num_gpus} GPU(s)") |
|
|
if num_gpus > 0: |
|
|
for i in range(num_gpus): |
|
|
print(f" GPU {i}: {torch.cuda.get_device_name(i)}") |
|
|
print(f"{'='*100}\n") |
|
|
|
|
|
|
|
|
config_path = os.path.join(merged_model_path, "config.json") |
|
|
with open(config_path, 'r') as f: |
|
|
model_config_dict = json.load(f) |
|
|
max_position_embeddings = model_config_dict.get('max_position_embeddings', 4096) |
|
|
rope_scaling = model_config_dict.get('rope_scaling', None) |
|
|
|
|
|
print(f"Model max_position_embeddings: {max_position_embeddings}") |
|
|
print(f"Model RoPE scaling config: {rope_scaling}") |
|
|
|
|
|
|
|
|
max_model_length = 8192 |
|
|
print(f"Using max_model_length: {max_model_length}\n") |
|
|
|
|
|
print("Setting up evaluation pipeline...") |
|
|
|
|
|
evaluation_tracker = EvaluationTracker( |
|
|
output_dir="./results", |
|
|
save_details=True, |
|
|
push_to_hub=False, |
|
|
) |
|
|
|
|
|
pipeline_params = PipelineParameters( |
|
|
launcher_type=ParallelismManager.ACCELERATE, |
|
|
custom_tasks_directory=None, |
|
|
max_samples=500 |
|
|
) |
|
|
|
|
|
model_config = VLLMModelConfig( |
|
|
model_name=merged_model_path, |
|
|
dtype="bfloat16", |
|
|
max_model_length=max_model_length, |
|
|
trust_remote_code=True, |
|
|
tensor_parallel_size=num_gpus, |
|
|
) |
|
|
|
|
|
task = "lighteval|math_500|0" |
|
|
|
|
|
print(f"Using {num_gpus} GPU(s) with tensor parallelism") |
|
|
print(f"Task: {task}") |
|
|
print(f"Max model length: {max_model_length}\n") |
|
|
|
|
|
print("Creating pipeline...") |
|
|
pipeline = Pipeline( |
|
|
tasks=task, |
|
|
pipeline_parameters=pipeline_params, |
|
|
evaluation_tracker=evaluation_tracker, |
|
|
model_config=model_config, |
|
|
) |
|
|
|
|
|
|
|
|
print("Configuring generation parameters...") |
|
|
for task_name, task_obj in pipeline.tasks_dict.items(): |
|
|
for doc in task_obj._docs: |
|
|
doc.generation_size = 2048 |
|
|
|
|
|
print("\nStarting evaluation...") |
|
|
print("="*100) |
|
|
pipeline.evaluate() |
|
|
|
|
|
print("\nSaving results...") |
|
|
pipeline.save_and_push_results() |
|
|
|
|
|
print("\nShowing results...") |
|
|
pipeline.show_results() |
|
|
|
|
|
print("\n" + "="*100) |
|
|
print("✓ Evaluation completed!") |
|
|
print("="*100) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |