Spaces:
Sleeping
Sleeping
File size: 5,108 Bytes
fcdea4e d5a16ff fcdea4e b3a554a fcdea4e edac724 fcdea4e d5a16ff fcdea4e d5a16ff fcdea4e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | #!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
import json
import os
from pathlib import Path
import platform
os.environ["UNSLOTH_USE_MODELSCOPE"] = "1"
if platform.system() in ("Windows", "Darwin"):
from project_settings import project_path
else:
project_path = os.path.abspath("../../../")
project_path = Path(project_path)
from datasets import load_dataset
from unsloth import FastLanguageModel
from transformers import TextStreamer
from tqdm import tqdm
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_name",
default="unsloth/Qwen3-8B-unsloth-bnb-4bit",
type=str
)
parser.add_argument(
"--lora_adapter_path",
default=(project_path / "trained_models" / "Qwen3-8B-sft-lora-adapter-unsloth").as_posix(),
type=str
)
parser.add_argument(
"--dataset_path",
default="miyuki2026/tutorials",
type=str
),
parser.add_argument("--dataset_name", default=None, type=str),
parser.add_argument("--dataset_split", default=None, type=str),
parser.add_argument(
"--dataset_cache_dir",
default=(project_path / "hub_datasets").as_posix(),
type=str
),
parser.add_argument("--dataset_streaming", default=None, type=str),
parser.add_argument("--valid_dataset_size", default=1000, type=str),
parser.add_argument("--shuffle_buffer_size", default=5000, type=str),
parser.add_argument(
"--max_new_tokens",
default=1024, # 8192, 128
type=int, help="最大生成长度(注意:并非模型实际长文本能力)"
)
parser.add_argument("--top_p", default=0.85, type=float, help="nucleus采样阈值(0-1)")
parser.add_argument("--temperature", default=0.85, type=float, help="生成温度,控制随机性(0-1,越大越随机)")
parser.add_argument(
"--num_workers",
default=None if platform.system() == "Windows" else os.cpu_count() // 2,
type=str
)
parser.add_argument("--output_file", default="evaluation.jsonl", type=str),
args = parser.parse_args()
return args
def main():
args = get_args()
output_file = Path(args.output_file)
output_file.parent.mkdir(parents=True, exist_ok=True)
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=args.model_name,
max_seq_length=2048, # 支持32K+长上下文
device_map="auto",
dtype=None, # 自动选择最优精度
load_in_4bit=True, # 4bit量化节省70%显存
)
# 2、注入lora适配器
model.load_adapter(args.lora_adapter_path)
# 启用unsloth推理加速
FastLanguageModel.for_inference(model)
model.eval()
dataset_dict = load_dataset(
path=args.dataset_path,
name=args.dataset_name,
data_dir="keywords",
# data_dir="psychology",
split=args.dataset_split,
cache_dir=args.dataset_cache_dir,
# num_proc=args.num_workers if not args.dataset_streaming else None,
streaming=args.dataset_streaming,
)
dataset = dataset_dict["train"]
print(dataset)
if args.dataset_streaming:
valid_dataset = dataset.take(args.valid_dataset_size)
# train_dataset = dataset.skip(args.valid_dataset_size)
# train_dataset = train_dataset.shuffle(buffer_size=args.shuffle_buffer_size, seed=None)
else:
dataset = dataset.train_test_split(test_size=args.valid_dataset_size, seed=None)
# train_dataset = dataset["train"]
valid_dataset = dataset["test"]
with open(output_file.as_posix(), "w", encoding="utf-8") as f:
for example in tqdm(valid_dataset):
conversation = example["conversation"]
prompt = conversation[:-1]
response = conversation[-1]["content"]
format_messages = tokenizer.apply_chat_template(
prompt,
tokenize=False, # 训练时部分词,true返回的是张量
add_generation_prompt=True, # 训练期间要关闭,如果是推理则设为True
)
# 4、调用tokenizer得到input
inputs = tokenizer(format_messages, return_tensors="pt").to(model.device)
# 5、调用model.generate()
generated_ids = model.generate(
**inputs,
max_new_tokens=args.max_new_tokens, do_sample=True,
pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id,
top_p=args.top_p, temperature=args.temperature, repetition_penalty=1.0,
)
response_: str = tokenizer.decode(generated_ids[0][len(inputs["input_ids"][0]):], skip_special_tokens=True)
response_ = response_.split("</think>")[-1].strip()
row = {
"prompt": prompt,
"response": response,
"response_": response_,
}
row = json.dumps(row, ensure_ascii=False)
f.write(f"{row}\n")
return
if __name__ == "__main__":
main()
|