File size: 56,875 Bytes
38d8dc2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 |
{
"cells": [
{
"cell_type": "markdown",
"id": "e325d4b51fe4fad2",
"metadata": {},
"source": [
"# 加载模型以及数据"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "initial_id",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-31T15:57:38.940490Z",
"start_time": "2025-03-31T15:57:29.198500Z"
},
"collapsed": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"E:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"from transformers import AutoModelForCausalLM, AutoTokenizer\n",
"from trl import GRPOTrainer, GRPOConfig # 假设 trl 库中有 GRPOTrainer 模块\n",
"from peft import PeftModel\n",
"from reward import compute_rewards\n",
"def load_data(input_path):\n",
" data = []\n",
" with open(input_path, 'r', encoding='utf-8') as f:\n",
" for line in f:\n",
" line = line.strip()\n",
" if not line:\n",
" continue\n",
" parts = line.split('<think>', 1)\n",
" if len(parts) != 2:\n",
" print(f\"警告: 格式错误的行,已跳过: {line}\")\n",
" continue\n",
" keywords_part, lyrics = parts[0], parts[1]\n",
" keywords = [kw.strip() for kw in keywords_part.split(',')]\n",
"\n",
" # 关键修改:使用关键词作为 prompt,歌词作为 completion\n",
" data.append({\n",
" 'prompt': \"根据以下关键词生成一首歌词,歌词中包含多个句子,确保句子通顺,诗意,格式正确.让我们一步一步的思考(思考过程包含在<think>和</think>之间):\" + \",\".join(keywords), # 关键词拼接成字符串,作为模型输入\n",
" 'completion': \"<think>\" + lyrics, # 歌词(去掉多余空格),作为模型输出\n",
" 'keywords': keywords, # 关键词拼接成字符串,作为模型输入\n",
" })\n",
" \n",
" print(f\"成功加载 {len(data)} 条数据\")\n",
" return data\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "dc98a63e850836a",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-31T15:57:38.963191Z",
"start_time": "2025-03-31T15:57:38.951488Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"成功加载 1000 条数据\n",
"第一条数据: {'prompt': '根据以下关键词生成一首歌词,歌词中包含多个句子,确保句子通顺,诗意,格式正确.让我们一步一步的思考(思考过程包含在<think>和</think>之间):午夜,寒冬,心动', 'completion': '<think>嗯,用户让我根据“午夜,寒冬,心动”这三个关键词写一首诗。首先,我需要理解每个词带来的意象和情感。午夜通常给人一种寂静、神秘的感觉,可能带有孤独或反思的情绪。寒冬则联想到寒冷、冰雪,可能还有萧瑟的景象,但也可以有温暖的反差,比如炉火或者温暖的回忆。心动是情感的核心,可能涉及爱情、突然的情感波动,或者内心的温暖。接下来,我要考虑如何将这三个元素融合在一起。可能需要营造一个寒冷的冬夜环境,然后在这样的背景下引入心动的瞬间,形成对比。比如,用寒冷的景象来衬托内心的温暖或突然的情感变化。然后,结构方面,可能需要分几段,每段围绕一个关键词展开,但又要相互联系。比如,前两段描绘午夜和寒冬的景象,第三段引入心动的元素。或者交替出现,交织在一起。语言风格方面,用户可能希望有诗意和意象化的表达,避免直白。使用比喻、拟人等修辞手法,比如将心跳比作钟摆,或者用冰晶、霜花来表现寒冷。同时,注意押韵和节奏,使诗歌流畅。还需要考虑用户可能的深层需求。他们可能不仅仅想要一首描述这三个词的诗,而是希望表达在寒冷孤独的环境中突然感受到的温暖或爱情,强调对比和情感冲击。因此,诗里需要有转折点,从冷到暖,从静到动。可能会遇到的挑战是如何自然地将三个关键词串联起来,不显得生硬。可能需要找到一个共同的场景或隐喻,比如午夜寒冬中的相遇,或者内心的变化。同时,避免陈词滥调,用新颖的比喻,比如将霜花比作时针,或者心跳与钟摆的共振。最后,检查诗的整体意境是否统一,情感是否连贯,是否有足够的意象让读者产生共鸣。可能需要调整用词和结构,使每个部分都服务于主题,增强整体效果。</think>:《冰裂时刻》/石英血管里霜花正逆向生长/成为时针,成为所有表盘/反向旋转的证词。玻璃在窗框深处/爆出细纹,像候鸟迁徙时/骨骼深处迸发的裂响//壁炉吞下最后一块松木的年轮/灰烬在铁架上凝结成第六种晶体/这时钟摆突然停驻——/某种振频正穿过雪原/穿过二十年冻土下沉默的矿脉//冰层深处传来远古鲸群的应和/我们站在地磁偏转的切线上/听见彼此胸腔里/石英开始顺时针碎裂/春天正从裂缝中/涌出第一滴蓝', 'keywords': ['午夜', '寒冬', '心动']}\n"
]
}
],
"source": [
"dataset = load_data('../data/CoTdata.txt')\n",
"if dataset:\n",
" print(\"第一条数据:\", dataset[0])\n",
"else:\n",
" print(\"未加载到有效数据。\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "17175326d7901595",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-31T15:57:45.180036Z",
"start_time": "2025-03-31T15:57:41.329675Z"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"trainable params: 372,736 || all params: 1,777,460,736 || trainable%: 0.0210\n",
"None\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"E:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\peft\\mapping_func.py:73: UserWarning: You are trying to modify a model with PEFT for a second time. If you want to reload the model with a different config, make sure to call `.unload()` before.\n",
" warnings.warn(\n",
"E:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\peft\\tuners\\tuners_utils.py:167: UserWarning: Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!\n",
" warnings.warn(\n"
]
}
],
"source": [
"base_model = AutoModelForCausalLM.from_pretrained(\"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B\").to(\"cuda\")\n",
"# 2. 加载 LoRA 适配器\n",
"model = PeftModel.from_pretrained(base_model, \"../3_26_LoRA\").to(\"cuda\") # 你的 LoRA 路径\n",
"tokenizer = AutoTokenizer.from_pretrained(\"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B\")\n",
"tokenizer.pad_token = tokenizer.eos_token\n",
"# Load LoRA lora_config = LoraConfig( task_type=\"CAUSAL_LM\", r=16, lora_alpha=32, target_modules=\"all-linear\", ) model = get_peft_model(model, lora_config) print(model.print_trainable_parameters()) \n",
"from peft import LoraConfig, get_peft_model\n",
"target_modules = [\"q_proj\", \"k_proj\", \"v_proj\"] \n",
"lora_config = LoraConfig(\n",
" r=2, # 秩(可尝试8~32)\n",
" lora_alpha=32, # 缩放系数(通常设为2*r)\n",
" target_modules=target_modules, \n",
" bias=\"none\", # 不训练偏置项\n",
")\n",
"model = get_peft_model(model, lora_config) \n",
"print(model.print_trainable_parameters()) "
]
},
{
"cell_type": "markdown",
"id": "d2a601c596644fc",
"metadata": {},
"source": [
"# 配置训练参数"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "8ce28487acb606a2",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-31T15:57:48.223131Z",
"start_time": "2025-03-31T15:57:48.149696Z"
}
},
"outputs": [],
"source": [
"# 配置 GRPO 的超参数(这里的参数可以根据需要进行调整)\n",
"config = GRPOConfig(\n",
" gradient_accumulation_steps = 50, # 多少步更新一次参考模型\n",
" per_device_train_batch_size=2, # 每个批次的样本数\n",
" epsilon=0.2, # GRPO 中的 clip 范围\n",
" beta=0.05, # KL 惩罚系数\n",
" num_train_epochs=1, # 总训练步数(总周期)\n",
" num_generations=2, # 分组采样的大小\n",
" learning_rate=1e-5, # 优化器的学习率\n",
" bf16=True, \n",
" adam_beta1=0.9,\n",
" adam_beta2=0.98,\n",
" optim=\"adamw_8bit\", # 优化器\n",
" max_grad_norm=0.1, # 梯度裁剪的最大值\n",
" save_steps=1000, # 多少步保存一次模型\n",
" save_total_limit=2, # 最多保存几个模型 \n",
" logging_steps=5, # 多少步打印一次训练信息\n",
" output_dir=\"GRPO\", # 模型保存路径\n",
" weight_decay=0.01, # 权重衰减\n",
" warmup_ratio=0.03, # 预热比例\n",
" max_prompt_length=256,\n",
" max_completion_length=1024, # 最大输出长度\n",
" report_to='tensorboard', # or `tensorboard`\n",
")\n",
"# Training arguments training_args = GRPOConfig( \n",
"# output_dir=\"GRPO\", \n",
"# learning_rate=2e-5, \n",
"# per_device_train_batch_size=8, \n",
"# gradient_accumulation_steps=2, \n",
"# max_prompt_length=512, \n",
"# max_completion_length=96, \n",
"# num_generations=8, \n",
"# optim=\"adamw_8bit\", \n",
"# num_train_epochs=1, \n",
"# bf16=True, \n",
"# report_to=[\"wandb\"], \n",
"# remove_unused_columns=False, \n",
"# logging_steps=1, \n",
"# ) "
]
},
{
"cell_type": "markdown",
"id": "793b3094cd98fed6",
"metadata": {},
"source": [
"# 训练模型"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "19094188d22e45c2",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-31T17:08:58.041584Z",
"start_time": "2025-03-31T15:57:50.316805Z"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n"
]
},
{
"data": {
"text/html": [
"\n",
" <div>\n",
" \n",
" <progress value='2' max='20' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [ 2/20 : < :, Epoch 0.05/1]\n",
" </div>\n",
" <table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>Step</th>\n",
" <th>Training Loss</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table><p>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mKeyboardInterrupt\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 16\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;66;03m# Trainer trainer = GRPOTrainer( \u001b[39;00m\n\u001b[32m 2\u001b[39m \u001b[38;5;66;03m# model=model, \u001b[39;00m\n\u001b[32m 3\u001b[39m \u001b[38;5;66;03m# reward_funcs=[reward_len], \u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 8\u001b[39m \u001b[38;5;66;03m# wandb.init(project=\"GRPO\") \u001b[39;00m\n\u001b[32m 9\u001b[39m \u001b[38;5;66;03m# trainer.train()\u001b[39;00m\n\u001b[32m 10\u001b[39m trainer = GRPOTrainer(\n\u001b[32m 11\u001b[39m model=model,\n\u001b[32m 12\u001b[39m reward_funcs=[compute_rewards],\n\u001b[32m 13\u001b[39m args=config,\n\u001b[32m 14\u001b[39m train_dataset=dataset\n\u001b[32m 15\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m16\u001b[39m \u001b[43mtrainer\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\transformers\\trainer.py:2245\u001b[39m, in \u001b[36mTrainer.train\u001b[39m\u001b[34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[39m\n\u001b[32m 2243\u001b[39m hf_hub_utils.enable_progress_bars()\n\u001b[32m 2244\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m2245\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 2246\u001b[39m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m=\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2247\u001b[39m \u001b[43m \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m=\u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2248\u001b[39m \u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2249\u001b[39m \u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m=\u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2250\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\transformers\\trainer.py:2556\u001b[39m, in \u001b[36mTrainer._inner_training_loop\u001b[39m\u001b[34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[39m\n\u001b[32m 2549\u001b[39m context = (\n\u001b[32m 2550\u001b[39m functools.partial(\u001b[38;5;28mself\u001b[39m.accelerator.no_sync, model=model)\n\u001b[32m 2551\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m i != \u001b[38;5;28mlen\u001b[39m(batch_samples) - \u001b[32m1\u001b[39m\n\u001b[32m 2552\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m.accelerator.distributed_type != DistributedType.DEEPSPEED\n\u001b[32m 2553\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m contextlib.nullcontext\n\u001b[32m 2554\u001b[39m )\n\u001b[32m 2555\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m context():\n\u001b[32m-> \u001b[39m\u001b[32m2556\u001b[39m tr_loss_step = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mtraining_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_items_in_batch\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2558\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[32m 2559\u001b[39m args.logging_nan_inf_filter\n\u001b[32m 2560\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torch_xla_available()\n\u001b[32m 2561\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m (torch.isnan(tr_loss_step) \u001b[38;5;129;01mor\u001b[39;00m torch.isinf(tr_loss_step))\n\u001b[32m 2562\u001b[39m ):\n\u001b[32m 2563\u001b[39m \u001b[38;5;66;03m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[32m 2564\u001b[39m tr_loss = tr_loss + tr_loss / (\u001b[32m1\u001b[39m + \u001b[38;5;28mself\u001b[39m.state.global_step - \u001b[38;5;28mself\u001b[39m._globalstep_last_logged)\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\transformers\\trainer.py:3712\u001b[39m, in \u001b[36mTrainer.training_step\u001b[39m\u001b[34m(self, model, inputs, num_items_in_batch)\u001b[39m\n\u001b[32m 3709\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m.optimizer, \u001b[33m\"\u001b[39m\u001b[33mtrain\u001b[39m\u001b[33m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mcallable\u001b[39m(\u001b[38;5;28mself\u001b[39m.optimizer.train):\n\u001b[32m 3710\u001b[39m \u001b[38;5;28mself\u001b[39m.optimizer.train()\n\u001b[32m-> \u001b[39m\u001b[32m3712\u001b[39m inputs = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_prepare_inputs\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 3713\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m is_sagemaker_mp_enabled():\n\u001b[32m 3714\u001b[39m loss_mb = smp_forward_backward(model, inputs, \u001b[38;5;28mself\u001b[39m.args.gradient_accumulation_steps)\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\trl\\extras\\profiling.py:87\u001b[39m, in \u001b[36mprofiling_decorator.<locals>.wrapper\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 84\u001b[39m \u001b[38;5;129m@functools\u001b[39m.wraps(func)\n\u001b[32m 85\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mwrapper\u001b[39m(\u001b[38;5;28mself\u001b[39m, *args, **kwargs):\n\u001b[32m 86\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m profiling_context(\u001b[38;5;28mself\u001b[39m, func.\u001b[34m__name__\u001b[39m):\n\u001b[32m---> \u001b[39m\u001b[32m87\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\trl\\trainer\\grpo_trainer.py:647\u001b[39m, in \u001b[36mGRPOTrainer._prepare_inputs\u001b[39m\u001b[34m(self, inputs)\u001b[39m\n\u001b[32m 645\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m mode == \u001b[33m\"\u001b[39m\u001b[33mtrain\u001b[39m\u001b[33m\"\u001b[39m:\n\u001b[32m 646\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.state.global_step % \u001b[38;5;28mself\u001b[39m.num_iterations == \u001b[32m0\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m647\u001b[39m inputs = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_generate_and_score_completions\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 648\u001b[39m \u001b[38;5;28mself\u001b[39m._buffered_inputs[\u001b[38;5;28mself\u001b[39m._step % \u001b[38;5;28mself\u001b[39m.args.gradient_accumulation_steps] = inputs\n\u001b[32m 649\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\trl\\trainer\\grpo_trainer.py:719\u001b[39m, in \u001b[36mGRPOTrainer._generate_and_score_completions\u001b[39m\u001b[34m(self, inputs)\u001b[39m\n\u001b[32m 714\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 715\u001b[39m \u001b[38;5;66;03m# Regular generation path\u001b[39;00m\n\u001b[32m 716\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m unwrap_model_for_generation(\n\u001b[32m 717\u001b[39m \u001b[38;5;28mself\u001b[39m.model_wrapped, \u001b[38;5;28mself\u001b[39m.accelerator, gather_deepspeed3_params=\u001b[38;5;28mself\u001b[39m.args.ds3_gather_for_generation\n\u001b[32m 718\u001b[39m ) \u001b[38;5;28;01mas\u001b[39;00m unwrapped_model:\n\u001b[32m--> \u001b[39m\u001b[32m719\u001b[39m prompt_completion_ids = \u001b[43munwrapped_model\u001b[49m\u001b[43m.\u001b[49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 720\u001b[39m \u001b[43m \u001b[49m\u001b[43mprompt_ids\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[43m=\u001b[49m\u001b[43mprompt_mask\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgeneration_config\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mgeneration_config\u001b[49m\n\u001b[32m 721\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 723\u001b[39m \u001b[38;5;66;03m# Compute prompt length and extract completion ids\u001b[39;00m\n\u001b[32m 724\u001b[39m prompt_length = prompt_ids.size(\u001b[32m1\u001b[39m)\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\peft\\peft_model.py:823\u001b[39m, in \u001b[36mPeftModel.generate\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 821\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m._enable_peft_forward_hooks(*args, **kwargs):\n\u001b[32m 822\u001b[39m kwargs = {k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m kwargs.items() \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m.special_peft_forward_args}\n\u001b[32m--> \u001b[39m\u001b[32m823\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mget_base_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\peft\\peft_model.py:1874\u001b[39m, in \u001b[36mPeftModelForCausalLM.generate\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1872\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m._enable_peft_forward_hooks(*args, **kwargs):\n\u001b[32m 1873\u001b[39m kwargs = {k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m kwargs.items() \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m.special_peft_forward_args}\n\u001b[32m-> \u001b[39m\u001b[32m1874\u001b[39m outputs = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mbase_model\u001b[49m\u001b[43m.\u001b[49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1875\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1876\u001b[39m outputs = \u001b[38;5;28mself\u001b[39m.base_model.generate(**kwargs)\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\utils\\_contextlib.py:116\u001b[39m, in \u001b[36mcontext_decorator.<locals>.decorate_context\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 113\u001b[39m \u001b[38;5;129m@functools\u001b[39m.wraps(func)\n\u001b[32m 114\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mdecorate_context\u001b[39m(*args, **kwargs):\n\u001b[32m 115\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[32m--> \u001b[39m\u001b[32m116\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\transformers\\generation\\utils.py:2326\u001b[39m, in \u001b[36mGenerationMixin.generate\u001b[39m\u001b[34m(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, use_model_defaults, **kwargs)\u001b[39m\n\u001b[32m 2318\u001b[39m input_ids, model_kwargs = \u001b[38;5;28mself\u001b[39m._expand_inputs_for_generation(\n\u001b[32m 2319\u001b[39m input_ids=input_ids,\n\u001b[32m 2320\u001b[39m expand_size=generation_config.num_return_sequences,\n\u001b[32m 2321\u001b[39m is_encoder_decoder=\u001b[38;5;28mself\u001b[39m.config.is_encoder_decoder,\n\u001b[32m 2322\u001b[39m **model_kwargs,\n\u001b[32m 2323\u001b[39m )\n\u001b[32m 2325\u001b[39m \u001b[38;5;66;03m# 12. run sample (it degenerates to greedy search when `generation_config.do_sample=False`)\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m2326\u001b[39m result = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_sample\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 2327\u001b[39m \u001b[43m \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2328\u001b[39m \u001b[43m \u001b[49m\u001b[43mlogits_processor\u001b[49m\u001b[43m=\u001b[49m\u001b[43mprepared_logits_processor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2329\u001b[39m \u001b[43m \u001b[49m\u001b[43mstopping_criteria\u001b[49m\u001b[43m=\u001b[49m\u001b[43mprepared_stopping_criteria\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2330\u001b[39m \u001b[43m \u001b[49m\u001b[43mgeneration_config\u001b[49m\u001b[43m=\u001b[49m\u001b[43mgeneration_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2331\u001b[39m \u001b[43m \u001b[49m\u001b[43msynced_gpus\u001b[49m\u001b[43m=\u001b[49m\u001b[43msynced_gpus\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2332\u001b[39m \u001b[43m \u001b[49m\u001b[43mstreamer\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstreamer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2333\u001b[39m \u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mmodel_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2334\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2336\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m generation_mode \u001b[38;5;129;01min\u001b[39;00m (GenerationMode.BEAM_SAMPLE, GenerationMode.BEAM_SEARCH):\n\u001b[32m 2337\u001b[39m \u001b[38;5;66;03m# 11. interleave input_ids with `num_beams` additional sequences per batch\u001b[39;00m\n\u001b[32m 2338\u001b[39m input_ids, model_kwargs = \u001b[38;5;28mself\u001b[39m._expand_inputs_for_generation(\n\u001b[32m 2339\u001b[39m input_ids=input_ids,\n\u001b[32m 2340\u001b[39m expand_size=generation_config.num_beams,\n\u001b[32m 2341\u001b[39m is_encoder_decoder=\u001b[38;5;28mself\u001b[39m.config.is_encoder_decoder,\n\u001b[32m 2342\u001b[39m **model_kwargs,\n\u001b[32m 2343\u001b[39m )\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\transformers\\generation\\utils.py:3289\u001b[39m, in \u001b[36mGenerationMixin._sample\u001b[39m\u001b[34m(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, **model_kwargs)\u001b[39m\n\u001b[32m 3287\u001b[39m is_prefill = \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[32m 3288\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m3289\u001b[39m outputs = \u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mmodel_inputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[32m 3291\u001b[39m \u001b[38;5;66;03m# synced_gpus: don't waste resources running the code we don't need; kwargs must be updated before skipping\u001b[39;00m\n\u001b[32m 3292\u001b[39m model_kwargs = \u001b[38;5;28mself\u001b[39m._update_model_kwargs_for_generation(\n\u001b[32m 3293\u001b[39m outputs,\n\u001b[32m 3294\u001b[39m model_kwargs,\n\u001b[32m 3295\u001b[39m is_encoder_decoder=\u001b[38;5;28mself\u001b[39m.config.is_encoder_decoder,\n\u001b[32m 3296\u001b[39m )\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1739\u001b[39m, in \u001b[36mModule._wrapped_call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1737\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._compiled_call_impl(*args, **kwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m 1738\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1739\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1750\u001b[39m, in \u001b[36mModule._call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1745\u001b[39m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[32m 1746\u001b[39m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[32m 1747\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m._backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_pre_hooks\n\u001b[32m 1748\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[32m 1749\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[32m-> \u001b[39m\u001b[32m1750\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1752\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1753\u001b[39m called_always_called_hooks = \u001b[38;5;28mset\u001b[39m()\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\transformers\\utils\\deprecation.py:172\u001b[39m, in \u001b[36mdeprecate_kwarg.<locals>.wrapper.<locals>.wrapped_func\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 168\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m minimum_action \u001b[38;5;129;01min\u001b[39;00m (Action.NOTIFY, Action.NOTIFY_ALWAYS) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torchdynamo_compiling():\n\u001b[32m 169\u001b[39m \u001b[38;5;66;03m# DeprecationWarning is ignored by default, so we use FutureWarning instead\u001b[39;00m\n\u001b[32m 170\u001b[39m warnings.warn(message, \u001b[38;5;167;01mFutureWarning\u001b[39;00m, stacklevel=\u001b[32m2\u001b[39m)\n\u001b[32m--> \u001b[39m\u001b[32m172\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\transformers\\models\\qwen2\\modeling_qwen2.py:855\u001b[39m, in \u001b[36mQwen2ForCausalLM.forward\u001b[39m\u001b[34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, cache_position, logits_to_keep, **kwargs)\u001b[39m\n\u001b[32m 852\u001b[39m return_dict = return_dict \u001b[38;5;28;01mif\u001b[39;00m return_dict \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m.config.use_return_dict\n\u001b[32m 854\u001b[39m \u001b[38;5;66;03m# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m855\u001b[39m outputs = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 856\u001b[39m \u001b[43m \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m=\u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 857\u001b[39m \u001b[43m \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[43m=\u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 858\u001b[39m \u001b[43m \u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[43m=\u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 859\u001b[39m \u001b[43m \u001b[49m\u001b[43mpast_key_values\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpast_key_values\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 860\u001b[39m \u001b[43m \u001b[49m\u001b[43minputs_embeds\u001b[49m\u001b[43m=\u001b[49m\u001b[43minputs_embeds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 861\u001b[39m \u001b[43m \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[43m=\u001b[49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 862\u001b[39m \u001b[43m \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[43m=\u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 863\u001b[39m \u001b[43m \u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m=\u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 864\u001b[39m \u001b[43m \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[43m=\u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 865\u001b[39m \u001b[43m \u001b[49m\u001b[43mcache_position\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcache_position\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 866\u001b[39m \u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 867\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 869\u001b[39m hidden_states = outputs[\u001b[32m0\u001b[39m]\n\u001b[32m 870\u001b[39m \u001b[38;5;66;03m# Only compute necessary logits, and do not upcast them to float if we are not computing the loss\u001b[39;00m\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1739\u001b[39m, in \u001b[36mModule._wrapped_call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1737\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._compiled_call_impl(*args, **kwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m 1738\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1739\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1750\u001b[39m, in \u001b[36mModule._call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1745\u001b[39m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[32m 1746\u001b[39m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[32m 1747\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m._backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_pre_hooks\n\u001b[32m 1748\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[32m 1749\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[32m-> \u001b[39m\u001b[32m1750\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1752\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1753\u001b[39m called_always_called_hooks = \u001b[38;5;28mset\u001b[39m()\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\transformers\\models\\qwen2\\modeling_qwen2.py:579\u001b[39m, in \u001b[36mQwen2Model.forward\u001b[39m\u001b[34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, cache_position, **flash_attn_kwargs)\u001b[39m\n\u001b[32m 567\u001b[39m layer_outputs = \u001b[38;5;28mself\u001b[39m._gradient_checkpointing_func(\n\u001b[32m 568\u001b[39m decoder_layer.\u001b[34m__call__\u001b[39m,\n\u001b[32m 569\u001b[39m hidden_states,\n\u001b[32m (...)\u001b[39m\u001b[32m 576\u001b[39m position_embeddings,\n\u001b[32m 577\u001b[39m )\n\u001b[32m 578\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m579\u001b[39m layer_outputs = \u001b[43mdecoder_layer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 580\u001b[39m \u001b[43m \u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 581\u001b[39m \u001b[43m \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcausal_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 582\u001b[39m \u001b[43m \u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[43m=\u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 583\u001b[39m \u001b[43m \u001b[49m\u001b[43mpast_key_value\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpast_key_values\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 584\u001b[39m \u001b[43m \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[43m=\u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 585\u001b[39m \u001b[43m \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[43m=\u001b[49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 586\u001b[39m \u001b[43m \u001b[49m\u001b[43mcache_position\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcache_position\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 587\u001b[39m \u001b[43m \u001b[49m\u001b[43mposition_embeddings\u001b[49m\u001b[43m=\u001b[49m\u001b[43mposition_embeddings\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 588\u001b[39m \u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mflash_attn_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 589\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 591\u001b[39m hidden_states = layer_outputs[\u001b[32m0\u001b[39m]\n\u001b[32m 593\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m output_attentions:\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1739\u001b[39m, in \u001b[36mModule._wrapped_call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1737\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._compiled_call_impl(*args, **kwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m 1738\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1739\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1750\u001b[39m, in \u001b[36mModule._call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1745\u001b[39m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[32m 1746\u001b[39m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[32m 1747\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m._backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_pre_hooks\n\u001b[32m 1748\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[32m 1749\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[32m-> \u001b[39m\u001b[32m1750\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1752\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1753\u001b[39m called_always_called_hooks = \u001b[38;5;28mset\u001b[39m()\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\transformers\\models\\qwen2\\modeling_qwen2.py:276\u001b[39m, in \u001b[36mQwen2DecoderLayer.forward\u001b[39m\u001b[34m(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, cache_position, position_embeddings, **kwargs)\u001b[39m\n\u001b[32m 274\u001b[39m residual = hidden_states\n\u001b[32m 275\u001b[39m hidden_states = \u001b[38;5;28mself\u001b[39m.post_attention_layernorm(hidden_states)\n\u001b[32m--> \u001b[39m\u001b[32m276\u001b[39m hidden_states = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mmlp\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 277\u001b[39m hidden_states = residual + hidden_states\n\u001b[32m 279\u001b[39m outputs = (hidden_states,)\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1739\u001b[39m, in \u001b[36mModule._wrapped_call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1737\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._compiled_call_impl(*args, **kwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m 1738\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1739\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1750\u001b[39m, in \u001b[36mModule._call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1745\u001b[39m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[32m 1746\u001b[39m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[32m 1747\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m._backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_pre_hooks\n\u001b[32m 1748\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[32m 1749\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[32m-> \u001b[39m\u001b[32m1750\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1752\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1753\u001b[39m called_always_called_hooks = \u001b[38;5;28mset\u001b[39m()\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\transformers\\models\\qwen2\\modeling_qwen2.py:57\u001b[39m, in \u001b[36mQwen2MLP.forward\u001b[39m\u001b[34m(self, x)\u001b[39m\n\u001b[32m 56\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, x):\n\u001b[32m---> \u001b[39m\u001b[32m57\u001b[39m down_proj = \u001b[38;5;28mself\u001b[39m.down_proj(\u001b[38;5;28mself\u001b[39m.act_fn(\u001b[38;5;28mself\u001b[39m.gate_proj(x)) * \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mup_proj\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[32m 58\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m down_proj\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1739\u001b[39m, in \u001b[36mModule._wrapped_call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1737\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._compiled_call_impl(*args, **kwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m 1738\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1739\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1750\u001b[39m, in \u001b[36mModule._call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1745\u001b[39m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[32m 1746\u001b[39m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[32m 1747\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m._backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_pre_hooks\n\u001b[32m 1748\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[32m 1749\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[32m-> \u001b[39m\u001b[32m1750\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1752\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1753\u001b[39m called_always_called_hooks = \u001b[38;5;28mset\u001b[39m()\n",
"\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\linear.py:125\u001b[39m, in \u001b[36mLinear.forward\u001b[39m\u001b[34m(self, input)\u001b[39m\n\u001b[32m 124\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Tensor) -> Tensor:\n\u001b[32m--> \u001b[39m\u001b[32m125\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlinear\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mbias\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[31mKeyboardInterrupt\u001b[39m: "
]
}
],
"source": [
"# Trainer trainer = GRPOTrainer( \n",
"# model=model, \n",
"# reward_funcs=[reward_len], \n",
"# args=training_args, \n",
"# train_dataset=dataset[\"train\"], \n",
"# ) \n",
"# Train model \n",
"# wandb.init(project=\"GRPO\") \n",
"# trainer.train()\n",
"trainer = GRPOTrainer(\n",
" model=model,\n",
" reward_funcs=[compute_rewards],\n",
" args=config,\n",
" train_dataset=dataset\n",
")\n",
"trainer.train()"
]
},
{
"cell_type": "markdown",
"id": "f621c33533e55b00",
"metadata": {},
"source": [
"# 评估"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d17d0e3eb9069545",
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"from datetime import datetime\n",
"\n",
"def plot_training_metrics(losses, kls, avg_rewards, output_dir=\".\"):\n",
" \"\"\"\n",
" 绘制并保存训练指标图表\n",
" \n",
" 参数:\n",
" losses: 训练损失列表\n",
" kls: KL散度列表\n",
" avg_rewards: 平均奖励列表\n",
" output_dir: 输出目录路径\n",
" \"\"\"\n",
" # 生成带时间戳的唯一文件名\n",
" timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n",
" output_path = f\"{output_dir}/training_curves_{timestamp}.png\"\n",
" \n",
" # 创建画布\n",
" plt.figure(figsize=(15, 5), dpi=300)\n",
" \n",
" # 1. Loss 曲线\n",
" plt.subplot(1, 3, 1)\n",
" plt.plot(losses, label=\"Loss\", linewidth=1.5, color='blue')\n",
" plt.title(\"Training Loss\", fontsize=10)\n",
" plt.xlabel(\"Step\", fontsize=9)\n",
" plt.ylabel(\"Loss\", fontsize=9)\n",
" plt.grid(True, alpha=0.3)\n",
" \n",
" # 2. KL 散度曲线\n",
" plt.subplot(1, 3, 2)\n",
" plt.plot(kls, label=\"KL Divergence\", linewidth=1.5, color='orange')\n",
" plt.title(\"KL Divergence\", fontsize=10)\n",
" plt.xlabel(\"Step\", fontsize=9)\n",
" plt.ylabel(\"KL Divergence\", fontsize=9)\n",
" plt.grid(True, alpha=0.3)\n",
" \n",
" # 3. 平均奖励曲线\n",
" plt.subplot(1, 3, 3)\n",
" plt.plot(avg_rewards, label=\"Avg Reward\", linewidth=1.5, color='green')\n",
" plt.title(\"Average Reward\", fontsize=10)\n",
" plt.xlabel(\"Step\", fontsize=9)\n",
" plt.ylabel(\"Reward\", fontsize=9)\n",
" plt.grid(True, alpha=0.3)\n",
" \n",
" # 调整布局并保存\n",
" plt.tight_layout()\n",
" plt.savefig(\n",
" output_path,\n",
" bbox_inches='tight',\n",
" facecolor='white',\n",
" dpi=300\n",
" )\n",
" plt.close()\n",
" \n",
" print(f\"训练指标图表已保存至: {output_path}\")\n",
"\n",
"# 使用示例 (假设你已经有了这些数据)\n",
"# losses = [...] # 你的损失数据\n",
"# kls = [...] # 你的KL散度数据\n",
"# avg_rewards = [...] # 你的平均奖励数据\n",
"# plot_training_metrics(losses, kls, avg_rewards)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b6a739a2f9d0a343",
"metadata": {},
"outputs": [],
"source": [
"class MetricsCallback(TrainerCallback):\n",
" def __init__(self):\n",
" super().__init__()\n",
" self.metrics = {\n",
" 'loss': [], \n",
" 'kl_divergence': [], \n",
" 'avg_reward': []\n",
" }\n",
" \n",
" def on_log(self, args, state, control, logs=None, **kwargs):\n",
" if logs is not None:\n",
" if 'loss' in logs:\n",
" self.metrics['loss'].append(logs['loss'])\n",
" if 'kl_divergence' in logs:\n",
" self.metrics['kl_divergence'].append(logs['kl_divergence'])\n",
" if 'rewards' in logs: # 假设返回的是列表,取其平均值\n",
" avg_reward = sum(logs['rewards'])/len(logs['rewards'])\n",
" self.metrics['avg_reward'].append(avg_reward)\n",
" \n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "14cee34aa3bb165",
"metadata": {},
"outputs": [],
"source": [
"plot_training_metrics(metrics_callback.metrics['loss'],metrics_callback.metrics['kl_divergence'],metrics_callback.metrics['avg_reward'])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|