| | import torch |
| | |
| | import os |
| | import yaml |
| | from peft import LoraConfig, get_peft_model_state_dict |
| | from torch.utils.data import DataLoader |
| | import time |
| |
|
| | from typing import List, Tuple |
| |
|
| | import json |
| | import re |
| | import string |
| | import copy |
| | from dataclasses import field, dataclass, asdict |
| | from typing import Sequence, Literal, Dict |
| |
|
| | import transformers |
| | from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer |
| | from transformers import Trainer |
| | from transformers.modeling_utils import * |
| | from transformers.trainer import _is_peft_model |
| | from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES |
| | from transformers.data.data_collator import DataCollator |
| |
|
| | from transformers.training_args import TrainingArguments |
| | from transformers.tokenization_utils_base import PreTrainedTokenizerBase |
| | from transformers.trainer_callback import TrainerCallback |
| | from transformers.trainer_utils import EvalPrediction |
| | from torch.utils.data import Dataset, IterableDataset |
| | from datasets import load_dataset |
| |
|
| | import draccus |
| | import argparse |
| | |
| | import numpy as np |
| | import random |
| | import transformers |
| |
|
| | import argparse |
| | from datetime import datetime |
| | |
| | from iba import (IbaXs_LlamaModel, IbaXs_LlamaForCausalLM, |
| | HyperNetXSexp, |
| | count_parameters, MainConfig, mark_iba_as_trainable_only |
| | ) |
| |
|
| | from tqdm import tqdm |
| | from torch.utils.data import DataLoader |
| | from transformers import DataCollatorWithPadding, AutoTokenizer |
| |
|
| | IGNORE_INDEX = -100 |
| | DEFAULT_PAD_TOKEN = "[PAD]" |
| | DEFAULT_EOS_TOKEN = "</s>" |
| | DEFAULT_BOS_TOKEN = "</s>" |
| | DEFAULT_UNK_TOKEN = "</s>" |
| |
|
| | MAX_NEW_TOKENS = 50 |
| | PROMPT_TEMPLATE = ( |
| | "Below is an instruction that describes a task. " |
| | "Write a response that appropriately completes the request.\n\n" |
| | "### Instruction:\n{instruction}\n\n{input_section}\n" |
| | "### Response:\n" |
| | ) |
| |
|
| | def set_deterministic_seed(seed=42): |
| | random.seed(seed) |
| | np.random.seed(seed) |
| | torch.manual_seed(seed) |
| | torch.cuda.manual_seed_all(seed) |
| | transformers.set_seed(seed) |
| | |
| | |
| |
|
| | def preprocess_and_tokenize(examples, tokenizer, max_seq_length): |
| | """ |
| | Combines formatting and tokenization into one step. |
| | """ |
| | prompts = [] |
| | for i in range(len(examples['instruction'])): |
| | instruction = examples['instruction'][i] |
| | inp = examples.get('input', [""])[i] |
| | |
| | if inp and str(inp).strip(): |
| | input_section = f"### Input:\n{inp}\n\n" |
| | else: |
| | input_section = "" |
| | |
| | source_text = PROMPT_TEMPLATE.format( |
| | instruction=instruction, |
| | input_section=input_section |
| | ) |
| | prompts.append(source_text) |
| | |
| | |
| | |
| | tokenized = tokenizer( |
| | prompts, |
| | truncation=True, |
| | max_length=max_seq_length, |
| | padding=False |
| | ) |
| | return tokenized |
| |
|
| |
|
| | def extract_answer(test_target, sentence: str) -> float: |
| | |
| | sentence_ = sentence.lower().strip() |
| | match = re.search(r"the correct answer is\s+(answer\d+|solution\d+|option\d+|ending\d+|true|false)", sentence_) |
| | if match: |
| | return match.group(1) |
| | |
| | patterns = { |
| | 'boolq': r'true|false', |
| | 'piqa': r'solution1|solution2', |
| | 'hellaswag': r'ending1|ending2|ending3|ending4', |
| | 'winogrande': r'option1|option2', |
| | 'default': r'answer1|answer2|answer3|answer4|answer5' |
| | } |
| | |
| | target_pattern = patterns.get(test_target, patterns['default']) |
| | pred_answers = re.findall(target_pattern, sentence_) |
| | |
| | return pred_answers[0] if pred_answers else "" |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | def score_outputs(outputs, test_target_name, ground_truths, out_json): |
| | results = [] |
| | total_em = 0 |
| | total_samples = len(ground_truths) |
| |
|
| | print("Calculating scores...") |
| | for i, prediction in enumerate(outputs): |
| | |
| | |
| | extracted_pred = extract_answer(test_target_name, prediction) |
| | if extracted_pred == "unknown" or extracted_pred == "": |
| | print(f'Please check, task: {test_target_name}, idx {i}, pred {prediction}') |
| | gt = ground_truths[i].lower().strip() |
| | |
| | is_correct = (extracted_pred == gt) |
| | if is_correct: |
| | total_em += 1 |
| | results.append({ |
| | "id": i, |
| | "prediction": prediction, |
| | "extracted_pred": extracted_pred, |
| | "ground_truths": gt, |
| | "is_correct": is_correct, |
| | }) |
| |
|
| | |
| | avg_acc = 100.0 * total_em / total_samples if total_samples > 0 else -1 |
| |
|
| | print("\n" + "="*40) |
| | print(f"FINAL RESULTS (vLLM) {test_target_name}") |
| | print("="*40) |
| | print(f"Total Samples: {total_samples}") |
| | print(f"Exact Match (EM): {avg_acc:.2f}%") |
| | print("="*40) |
| | |
| | os.makedirs(out_json, exist_ok=True) |
| | save_file = out_json + f'/{test_target_name}.json' |
| | with open(save_file, "w", encoding="utf-8") as f: |
| | json.dump({ |
| | "metrics": {"EM": avg_acc}, |
| | "details": results |
| | }, f, indent=2, ensure_ascii=False) |
| | return avg_acc |
| |
|
| |
|
| | @draccus.wrap() |
| | def main(main_cfg: MainConfig): |
| | print('='*120) |
| | |
| |
|
| | model_path = main_cfg.infer.model_path + "/ft2" |
| |
|
| | |
| | if not os.path.exists(model_path): |
| | raise FileNotFoundError(f"Model directory does not exist: {model_path}") |
| | print(f"Verified model path: {os.path.abspath(model_path)}") |
| | out_json = main_cfg.infer.model_path + "/results" |
| | print('output json path: ', out_json) |
| |
|
| | model = IbaXs_LlamaForCausalLM.from_pretrained( |
| | model_path, |
| | device_map="auto", |
| | dtype=torch.bfloat16, |
| | local_files_only=True |
| | ) |
| | model.to("cuda") |
| | model.eval() |
| |
|
| | tokenizer = AutoTokenizer.from_pretrained(main_cfg.model.base_model_name, padding_side='left') |
| | if tokenizer.pad_token is None: |
| | if tokenizer.unk_token_id is not None: |
| | tokenizer.pad_token_id = tokenizer.unk_token_id |
| | tokenizer.pad_token = tokenizer.unk_token |
| | print("Set PAD token to UNK token.") |
| | elif tokenizer.eos_token_id is not None: |
| | tokenizer.pad_token_id = tokenizer.eos_token_id |
| | tokenizer.pad_token = tokenizer.eos_token |
| | print("Set PAD token to EOS token.") |
| |
|
| | if model is not None: |
| | model.config.pad_token_id = tokenizer.pad_token_id |
| | if model.config.pad_token_id != tokenizer.pad_token_id: |
| | raise ValueError("Failed to sync pad_token_id between tokenizer and model config") |
| | |
| | BATCH_SIZE = main_cfg.infer.eval_batch_size |
| | |
| | |
| | |
| | start_time0 = datetime.now() |
| | final_res = {} |
| | all_task_acc = [] |
| | |
| | for test_target_name in main_cfg.infer.datasets: |
| | print("Loading dataset...", test_target_name) |
| | |
| | if main_cfg.infer.is_json: |
| | data_files = f'./dataset/{test_target_name}/test.json' |
| | if not os.path.exists(data_files): |
| | raise FileNotFoundError(f"can not find dataset file : {data_files}") |
| | raw_dataset = load_dataset("json", data_files=data_files, split='train') |
| | else: |
| | raise KeyError('Not implemented yet') |
| | |
| | |
| | test_dataset = raw_dataset.map( |
| | lambda x: preprocess_and_tokenize(x, tokenizer, main_cfg.model.cutoff_len), |
| | batched=True, |
| | batch_size=10000, |
| | num_proc=8, |
| | |
| | ) |
| | |
| | ground_truths = raw_dataset['answer'] |
| | test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask"]) |
| | data_collator = DataCollatorWithPadding(tokenizer=tokenizer, padding=True) |
| | data_loader = DataLoader( |
| | test_dataset, |
| | batch_size=BATCH_SIZE, |
| | collate_fn=data_collator, |
| | shuffle=False |
| | ) |
| |
|
| | print(f"Generating for {len(ground_truths)} samples of {test_target_name}...") |
| | |
| | final_predictions = [] |
| | start_time = datetime.now() |
| | with torch.no_grad(): |
| | for batch in tqdm(data_loader, desc=f"Inferencing {test_target_name}"): |
| | inputs = {k: v.to(model.device) for k, v in batch.items()} |
| | import torch.backends.cuda as cuda_sdp |
| | try: |
| | with cuda_sdp.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=True): |
| | outputs = model.generate( |
| | **inputs, |
| | max_new_tokens=MAX_NEW_TOKENS, |
| | do_sample=False, |
| | repetition_penalty=1.2, |
| | pad_token_id=tokenizer.pad_token_id, |
| | eos_token_id=tokenizer.eos_token_id |
| | ) |
| | except RuntimeError as e: |
| | print("PyTorch are using Math kernel only....") |
| | print(e) |
| | |
| | prompt_len = inputs['input_ids'].shape[1] |
| | new_tokens = outputs[:, prompt_len:] |
| | |
| | decoded_batch = tokenizer.batch_decode(new_tokens, skip_special_tokens=True) |
| | final_predictions.extend([text.strip() for text in decoded_batch]) |
| |
|
| | end_time = datetime.now() |
| | print('end time: ', end_time.strftime("%Y-%m-%d %H:%M:%S"), '| task, ', test_target_name, ' duration: ', end_time - start_time) |
| | |
| | avg_acc = score_outputs(outputs=final_predictions,test_target_name=test_target_name, |
| | out_json=out_json, ground_truths=ground_truths) |
| | |
| | final_res[test_target_name] = avg_acc |
| | all_task_acc.append(avg_acc) |
| | |
| | del final_predictions |
| | del test_dataset |
| | gc.collect() |
| | |
| | |
| | print('all_task_acc', all_task_acc) |
| | avg_score = sum(all_task_acc) / len(all_task_acc) |
| | final_res['average_score'] = avg_score |
| | save_file = out_json + f'/FINAL.json' |
| | with open(save_file, "w", encoding="utf-8") as f: |
| | json.dump(final_res, f, indent=2, ensure_ascii=False) |
| | print(f"Results saved to {save_file}, overall score: {avg_score}") |
| |
|
| | end_time0 = datetime.now() |
| | print('end time: ', end_time0.strftime("%Y-%m-%d %H:%M:%S"), '| duration: ', end_time0 - start_time0) |
| | |
| | if __name__ == "__main__": |
| | main() |