xixixhu commited on
Commit
30e80c5
·
verified ·
1 Parent(s): 1924553

Upload medical_test.ipynb

Browse files
Files changed (1) hide show
  1. medical_test.ipynb +545 -0
medical_test.ipynb ADDED
@@ -0,0 +1,545 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "83322446-f479-4ddb-ae43-80135b031341",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
14
+ "🦥 Unsloth Zoo will now patch everything to make training faster!\n"
15
+ ]
16
+ }
17
+ ],
18
+ "source": [
19
+ "from unsloth import FastLanguageModel"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": 2,
25
+ "id": "0b7d5555-efd3-4166-9810-e714d8e8b794",
26
+ "metadata": {},
27
+ "outputs": [],
28
+ "source": [
29
+ "from unsloth import FastLanguageModel\n",
30
+ "max_seq_length=2040\n",
31
+ "dtype=None\n",
32
+ "load_in_4bit=False"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "execution_count": 4,
38
+ "id": "92bcf0bd-d864-44ed-b55f-d12fe3687c7e",
39
+ "metadata": {},
40
+ "outputs": [
41
+ {
42
+ "name": "stdout",
43
+ "output_type": "stream",
44
+ "text": [
45
+ "==((====))== Unsloth 2025.2.15: Fast Qwen2 patching. Transformers: 4.49.0.\n",
46
+ " \\\\ /| GPU: NVIDIA GeForce RTX 4090 D. Max memory: 23.643 GB. Platform: Linux.\n",
47
+ "O^O/ \\_/ \\ Torch: 2.6.0+cu124. CUDA: 8.9. CUDA Toolkit: 12.4. Triton: 3.2.0\n",
48
+ "\\ / Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]\n",
49
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
50
+ "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n"
51
+ ]
52
+ },
53
+ {
54
+ "name": "stderr",
55
+ "output_type": "stream",
56
+ "text": [
57
+ "Sliding Window Attention is enabled but not implemented for `eager`; unexpected results may be encountered.\n"
58
+ ]
59
+ },
60
+ {
61
+ "data": {
62
+ "application/vnd.jupyter.widget-view+json": {
63
+ "model_id": "76868b84573c4f59b6b9068837b2c34f",
64
+ "version_major": 2,
65
+ "version_minor": 0
66
+ },
67
+ "text/plain": [
68
+ "Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
69
+ ]
70
+ },
71
+ "metadata": {},
72
+ "output_type": "display_data"
73
+ },
74
+ {
75
+ "name": "stderr",
76
+ "output_type": "stream",
77
+ "text": [
78
+ "/root/miniconda3/lib/python3.10/site-packages/peft/peft_model.py:599: UserWarning: Found missing adapter keys while loading the checkpoint: ['base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.0.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.0.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.0.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.0.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.0.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.0.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.0.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.0.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.1.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.1.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.1.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.1.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.1.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.1.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.1.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.1.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.1.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.1.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.2.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.2.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.2.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.2.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.2.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.2.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.2.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.2.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.2.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.2.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.2.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.3.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.3.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.3.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.3.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.3.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.3.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.3.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.3.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.3.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.3.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.3.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.3.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.3.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.3.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.4.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.4.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.4.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.4.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.4.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.4.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.4.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.4.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.4.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.4.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.4.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.4.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.4.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.4.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.5.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.5.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.5.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.5.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.5.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.5.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.5.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.5.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.5.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.5.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.5.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.5.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.5.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.5.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.6.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.6.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.6.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.6.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.6.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.6.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.6.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.6.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.6.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.6.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.6.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.6.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.6.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.6.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.7.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.7.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.7.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.7.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.7.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.7.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.7.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.7.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.7.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.7.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.7.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.7.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.7.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.7.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.8.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.8.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.8.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.8.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.8.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.8.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.8.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.8.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.8.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.8.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.8.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.8.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.8.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.8.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.9.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.9.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.9.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.9.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.9.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.9.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.9.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.9.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.9.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.9.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.9.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.9.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.9.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.9.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.10.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.10.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.10.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.10.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.10.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.10.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.10.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.10.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.10.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.10.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.10.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.10.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.10.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.10.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.11.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.11.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.11.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.11.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.11.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.11.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.11.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.11.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.11.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.11.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.11.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.11.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.11.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.11.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.12.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.12.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.12.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.12.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.12.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.12.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.12.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.12.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.12.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.12.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.12.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.12.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.12.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.12.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.13.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.13.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.13.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.13.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.13.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.13.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.13.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.13.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.13.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.13.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.13.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.13.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.13.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.13.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.14.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.14.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.14.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.14.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.14.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.14.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.14.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.14.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.14.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.14.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.14.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.14.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.14.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.14.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.15.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.15.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.15.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.15.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.15.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.15.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.15.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.15.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.15.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.15.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.15.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.15.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.15.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.15.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.16.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.16.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.16.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.16.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.16.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.16.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.16.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.16.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.16.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.16.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.16.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.16.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.16.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.16.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.17.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.17.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.17.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.17.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.17.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.17.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.17.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.17.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.17.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.17.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.17.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.17.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.17.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.17.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.18.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.18.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.18.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.18.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.18.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.18.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.18.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.18.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.18.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.18.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.18.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.18.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.18.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.18.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.19.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.19.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.19.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.19.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.19.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.19.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.19.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.19.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.19.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.19.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.19.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.19.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.19.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.19.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.20.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.20.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.20.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.20.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.20.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.20.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.20.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.20.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.20.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.20.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.20.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.20.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.20.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.20.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.21.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.21.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.21.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.21.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.21.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.21.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.21.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.21.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.21.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.21.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.21.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.21.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.21.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.21.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.22.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.22.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.22.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.22.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.22.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.22.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.22.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.22.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.22.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.22.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.22.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.22.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.22.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.22.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.23.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.23.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.23.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.23.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.23.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.23.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.23.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.23.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.23.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.23.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.23.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.23.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.23.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.23.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.24.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.24.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.24.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.24.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.24.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.24.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.24.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.24.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.24.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.24.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.24.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.24.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.24.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.24.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.25.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.25.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.25.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.25.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.25.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.25.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.25.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.25.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.25.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.25.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.25.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.25.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.25.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.25.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.26.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.26.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.26.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.26.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.26.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.26.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.26.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.26.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.26.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.26.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.26.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.26.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.26.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.26.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.27.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.27.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.27.self_attn.k_proj.lora_A.default.weight', 'base_model.model.model.layers.27.self_attn.k_proj.lora_B.default.weight', 'base_model.model.model.layers.27.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.27.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.27.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.27.self_attn.o_proj.lora_B.default.weight', 'base_model.model.model.layers.27.mlp.gate_proj.lora_A.default.weight', 'base_model.model.model.layers.27.mlp.gate_proj.lora_B.default.weight', 'base_model.model.model.layers.27.mlp.up_proj.lora_A.default.weight', 'base_model.model.model.layers.27.mlp.up_proj.lora_B.default.weight', 'base_model.model.model.layers.27.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.27.mlp.down_proj.lora_B.default.weight']\n",
79
+ " warnings.warn(f\"Found missing adapter keys while loading the checkpoint: {missing_keys}\")\n",
80
+ "Unsloth 2025.2.15 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.\n"
81
+ ]
82
+ }
83
+ ],
84
+ "source": [
85
+ "model, tokenizer = FastLanguageModel.from_pretrained(\n",
86
+ " model_name = \"DeepSeek-R1-Medical-COT\",\n",
87
+ " max_seq_length = max_seq_length,\n",
88
+ " dtype = dtype,\n",
89
+ " load_in_4bit = load_in_4bit,\n",
90
+ ")"
91
+ ]
92
+ },
93
+ {
94
+ "cell_type": "code",
95
+ "execution_count": 5,
96
+ "id": "bb0540dc-853f-4a5e-9196-4e93dffb25a8",
97
+ "metadata": {},
98
+ "outputs": [
99
+ {
100
+ "data": {
101
+ "text/plain": [
102
+ "PeftModelForCausalLM(\n",
103
+ " (base_model): LoraModel(\n",
104
+ " (model): Qwen2ForCausalLM(\n",
105
+ " (model): Qwen2Model(\n",
106
+ " (embed_tokens): Embedding(152064, 3584, padding_idx=151654)\n",
107
+ " (layers): ModuleList(\n",
108
+ " (0-27): 28 x Qwen2DecoderLayer(\n",
109
+ " (self_attn): Qwen2Attention(\n",
110
+ " (q_proj): lora.Linear(\n",
111
+ " (base_layer): Linear(in_features=3584, out_features=3584, bias=True)\n",
112
+ " (lora_dropout): ModuleDict(\n",
113
+ " (default): Identity()\n",
114
+ " )\n",
115
+ " (lora_A): ModuleDict(\n",
116
+ " (default): Linear(in_features=3584, out_features=16, bias=False)\n",
117
+ " )\n",
118
+ " (lora_B): ModuleDict(\n",
119
+ " (default): Linear(in_features=16, out_features=3584, bias=False)\n",
120
+ " )\n",
121
+ " (lora_embedding_A): ParameterDict()\n",
122
+ " (lora_embedding_B): ParameterDict()\n",
123
+ " (lora_magnitude_vector): ModuleDict()\n",
124
+ " )\n",
125
+ " (k_proj): lora.Linear(\n",
126
+ " (base_layer): Linear(in_features=3584, out_features=512, bias=True)\n",
127
+ " (lora_dropout): ModuleDict(\n",
128
+ " (default): Identity()\n",
129
+ " )\n",
130
+ " (lora_A): ModuleDict(\n",
131
+ " (default): Linear(in_features=3584, out_features=16, bias=False)\n",
132
+ " )\n",
133
+ " (lora_B): ModuleDict(\n",
134
+ " (default): Linear(in_features=16, out_features=512, bias=False)\n",
135
+ " )\n",
136
+ " (lora_embedding_A): ParameterDict()\n",
137
+ " (lora_embedding_B): ParameterDict()\n",
138
+ " (lora_magnitude_vector): ModuleDict()\n",
139
+ " )\n",
140
+ " (v_proj): lora.Linear(\n",
141
+ " (base_layer): Linear(in_features=3584, out_features=512, bias=True)\n",
142
+ " (lora_dropout): ModuleDict(\n",
143
+ " (default): Identity()\n",
144
+ " )\n",
145
+ " (lora_A): ModuleDict(\n",
146
+ " (default): Linear(in_features=3584, out_features=16, bias=False)\n",
147
+ " )\n",
148
+ " (lora_B): ModuleDict(\n",
149
+ " (default): Linear(in_features=16, out_features=512, bias=False)\n",
150
+ " )\n",
151
+ " (lora_embedding_A): ParameterDict()\n",
152
+ " (lora_embedding_B): ParameterDict()\n",
153
+ " (lora_magnitude_vector): ModuleDict()\n",
154
+ " )\n",
155
+ " (o_proj): lora.Linear(\n",
156
+ " (base_layer): Linear(in_features=3584, out_features=3584, bias=False)\n",
157
+ " (lora_dropout): ModuleDict(\n",
158
+ " (default): Identity()\n",
159
+ " )\n",
160
+ " (lora_A): ModuleDict(\n",
161
+ " (default): Linear(in_features=3584, out_features=16, bias=False)\n",
162
+ " )\n",
163
+ " (lora_B): ModuleDict(\n",
164
+ " (default): Linear(in_features=16, out_features=3584, bias=False)\n",
165
+ " )\n",
166
+ " (lora_embedding_A): ParameterDict()\n",
167
+ " (lora_embedding_B): ParameterDict()\n",
168
+ " (lora_magnitude_vector): ModuleDict()\n",
169
+ " )\n",
170
+ " (rotary_emb): LlamaRotaryEmbedding()\n",
171
+ " )\n",
172
+ " (mlp): Qwen2MLP(\n",
173
+ " (gate_proj): lora.Linear(\n",
174
+ " (base_layer): Linear(in_features=3584, out_features=18944, bias=False)\n",
175
+ " (lora_dropout): ModuleDict(\n",
176
+ " (default): Identity()\n",
177
+ " )\n",
178
+ " (lora_A): ModuleDict(\n",
179
+ " (default): Linear(in_features=3584, out_features=16, bias=False)\n",
180
+ " )\n",
181
+ " (lora_B): ModuleDict(\n",
182
+ " (default): Linear(in_features=16, out_features=18944, bias=False)\n",
183
+ " )\n",
184
+ " (lora_embedding_A): ParameterDict()\n",
185
+ " (lora_embedding_B): ParameterDict()\n",
186
+ " (lora_magnitude_vector): ModuleDict()\n",
187
+ " )\n",
188
+ " (up_proj): lora.Linear(\n",
189
+ " (base_layer): Linear(in_features=3584, out_features=18944, bias=False)\n",
190
+ " (lora_dropout): ModuleDict(\n",
191
+ " (default): Identity()\n",
192
+ " )\n",
193
+ " (lora_A): ModuleDict(\n",
194
+ " (default): Linear(in_features=3584, out_features=16, bias=False)\n",
195
+ " )\n",
196
+ " (lora_B): ModuleDict(\n",
197
+ " (default): Linear(in_features=16, out_features=18944, bias=False)\n",
198
+ " )\n",
199
+ " (lora_embedding_A): ParameterDict()\n",
200
+ " (lora_embedding_B): ParameterDict()\n",
201
+ " (lora_magnitude_vector): ModuleDict()\n",
202
+ " )\n",
203
+ " (down_proj): lora.Linear(\n",
204
+ " (base_layer): Linear(in_features=18944, out_features=3584, bias=False)\n",
205
+ " (lora_dropout): ModuleDict(\n",
206
+ " (default): Identity()\n",
207
+ " )\n",
208
+ " (lora_A): ModuleDict(\n",
209
+ " (default): Linear(in_features=18944, out_features=16, bias=False)\n",
210
+ " )\n",
211
+ " (lora_B): ModuleDict(\n",
212
+ " (default): Linear(in_features=16, out_features=3584, bias=False)\n",
213
+ " )\n",
214
+ " (lora_embedding_A): ParameterDict()\n",
215
+ " (lora_embedding_B): ParameterDict()\n",
216
+ " (lora_magnitude_vector): ModuleDict()\n",
217
+ " )\n",
218
+ " (act_fn): SiLU()\n",
219
+ " )\n",
220
+ " (input_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)\n",
221
+ " (post_attention_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)\n",
222
+ " )\n",
223
+ " )\n",
224
+ " (norm): Qwen2RMSNorm((3584,), eps=1e-06)\n",
225
+ " (rotary_emb): LlamaRotaryEmbedding()\n",
226
+ " )\n",
227
+ " (lm_head): Linear(in_features=3584, out_features=152064, bias=False)\n",
228
+ " )\n",
229
+ " )\n",
230
+ ")"
231
+ ]
232
+ },
233
+ "execution_count": 5,
234
+ "metadata": {},
235
+ "output_type": "execute_result"
236
+ }
237
+ ],
238
+ "source": [
239
+ "model"
240
+ ]
241
+ },
242
+ {
243
+ "cell_type": "code",
244
+ "execution_count": 6,
245
+ "id": "b3cd9792-98b9-4652-b4d7-d419cfff2a80",
246
+ "metadata": {},
247
+ "outputs": [
248
+ {
249
+ "data": {
250
+ "text/plain": [
251
+ "LlamaTokenizerFast(name_or_path='DeepSeek-R1-Medical-COT', vocab_size=151643, model_max_length=131072, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|begin▁of▁sentence|>', 'eos_token': '<|end▁of▁sentence|>', 'pad_token': '<|vision_pad|>'}, clean_up_tokenization_spaces=False, added_tokens_decoder={\n",
252
+ "\t151643: AddedToken(\"<|end▁of▁sentence|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
253
+ "\t151644: AddedToken(\"<|User|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n",
254
+ "\t151645: AddedToken(\"<|Assistant|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n",
255
+ "\t151646: AddedToken(\"<|begin▁of▁sentence|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
256
+ "\t151647: AddedToken(\"<|EOT|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n",
257
+ "\t151648: AddedToken(\"<think>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n",
258
+ "\t151649: AddedToken(\"</think>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n",
259
+ "\t151650: AddedToken(\"<|quad_start|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
260
+ "\t151651: AddedToken(\"<|quad_end|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
261
+ "\t151652: AddedToken(\"<|vision_start|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
262
+ "\t151653: AddedToken(\"<|vision_end|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
263
+ "\t151654: AddedToken(\"<|vision_pad|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
264
+ "\t151655: AddedToken(\"<|image_pad|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
265
+ "\t151656: AddedToken(\"<|video_pad|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
266
+ "\t151657: AddedToken(\"<tool_call>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n",
267
+ "\t151658: AddedToken(\"</tool_call>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n",
268
+ "\t151659: AddedToken(\"<|fim_prefix|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n",
269
+ "\t151660: AddedToken(\"<|fim_middle|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n",
270
+ "\t151661: AddedToken(\"<|fim_suffix|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n",
271
+ "\t151662: AddedToken(\"<|fim_pad|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n",
272
+ "\t151663: AddedToken(\"<|repo_name|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n",
273
+ "\t151664: AddedToken(\"<|file_sep|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n",
274
+ "}\n",
275
+ ")"
276
+ ]
277
+ },
278
+ "execution_count": 6,
279
+ "metadata": {},
280
+ "output_type": "execute_result"
281
+ }
282
+ ],
283
+ "source": [
284
+ "tokenizer"
285
+ ]
286
+ },
287
+ {
288
+ "cell_type": "code",
289
+ "execution_count": 7,
290
+ "id": "276ad22f-db26-44f1-a7d0-85de0d0b64a9",
291
+ "metadata": {},
292
+ "outputs": [
293
+ {
294
+ "data": {
295
+ "text/plain": [
296
+ "PeftModelForCausalLM(\n",
297
+ " (base_model): LoraModel(\n",
298
+ " (model): Qwen2ForCausalLM(\n",
299
+ " (model): Qwen2Model(\n",
300
+ " (embed_tokens): Embedding(152064, 3584, padding_idx=151654)\n",
301
+ " (layers): ModuleList(\n",
302
+ " (0-27): 28 x Qwen2DecoderLayer(\n",
303
+ " (self_attn): Qwen2Attention(\n",
304
+ " (q_proj): lora.Linear(\n",
305
+ " (base_layer): Linear(in_features=3584, out_features=3584, bias=True)\n",
306
+ " (lora_dropout): ModuleDict(\n",
307
+ " (default): Identity()\n",
308
+ " )\n",
309
+ " (lora_A): ModuleDict(\n",
310
+ " (default): Linear(in_features=3584, out_features=16, bias=False)\n",
311
+ " )\n",
312
+ " (lora_B): ModuleDict(\n",
313
+ " (default): Linear(in_features=16, out_features=3584, bias=False)\n",
314
+ " )\n",
315
+ " (lora_embedding_A): ParameterDict()\n",
316
+ " (lora_embedding_B): ParameterDict()\n",
317
+ " (lora_magnitude_vector): ModuleDict()\n",
318
+ " )\n",
319
+ " (k_proj): lora.Linear(\n",
320
+ " (base_layer): Linear(in_features=3584, out_features=512, bias=True)\n",
321
+ " (lora_dropout): ModuleDict(\n",
322
+ " (default): Identity()\n",
323
+ " )\n",
324
+ " (lora_A): ModuleDict(\n",
325
+ " (default): Linear(in_features=3584, out_features=16, bias=False)\n",
326
+ " )\n",
327
+ " (lora_B): ModuleDict(\n",
328
+ " (default): Linear(in_features=16, out_features=512, bias=False)\n",
329
+ " )\n",
330
+ " (lora_embedding_A): ParameterDict()\n",
331
+ " (lora_embedding_B): ParameterDict()\n",
332
+ " (lora_magnitude_vector): ModuleDict()\n",
333
+ " )\n",
334
+ " (v_proj): lora.Linear(\n",
335
+ " (base_layer): Linear(in_features=3584, out_features=512, bias=True)\n",
336
+ " (lora_dropout): ModuleDict(\n",
337
+ " (default): Identity()\n",
338
+ " )\n",
339
+ " (lora_A): ModuleDict(\n",
340
+ " (default): Linear(in_features=3584, out_features=16, bias=False)\n",
341
+ " )\n",
342
+ " (lora_B): ModuleDict(\n",
343
+ " (default): Linear(in_features=16, out_features=512, bias=False)\n",
344
+ " )\n",
345
+ " (lora_embedding_A): ParameterDict()\n",
346
+ " (lora_embedding_B): ParameterDict()\n",
347
+ " (lora_magnitude_vector): ModuleDict()\n",
348
+ " )\n",
349
+ " (o_proj): lora.Linear(\n",
350
+ " (base_layer): Linear(in_features=3584, out_features=3584, bias=False)\n",
351
+ " (lora_dropout): ModuleDict(\n",
352
+ " (default): Identity()\n",
353
+ " )\n",
354
+ " (lora_A): ModuleDict(\n",
355
+ " (default): Linear(in_features=3584, out_features=16, bias=False)\n",
356
+ " )\n",
357
+ " (lora_B): ModuleDict(\n",
358
+ " (default): Linear(in_features=16, out_features=3584, bias=False)\n",
359
+ " )\n",
360
+ " (lora_embedding_A): ParameterDict()\n",
361
+ " (lora_embedding_B): ParameterDict()\n",
362
+ " (lora_magnitude_vector): ModuleDict()\n",
363
+ " )\n",
364
+ " (rotary_emb): LlamaRotaryEmbedding()\n",
365
+ " )\n",
366
+ " (mlp): Qwen2MLP(\n",
367
+ " (gate_proj): lora.Linear(\n",
368
+ " (base_layer): Linear(in_features=3584, out_features=18944, bias=False)\n",
369
+ " (lora_dropout): ModuleDict(\n",
370
+ " (default): Identity()\n",
371
+ " )\n",
372
+ " (lora_A): ModuleDict(\n",
373
+ " (default): Linear(in_features=3584, out_features=16, bias=False)\n",
374
+ " )\n",
375
+ " (lora_B): ModuleDict(\n",
376
+ " (default): Linear(in_features=16, out_features=18944, bias=False)\n",
377
+ " )\n",
378
+ " (lora_embedding_A): ParameterDict()\n",
379
+ " (lora_embedding_B): ParameterDict()\n",
380
+ " (lora_magnitude_vector): ModuleDict()\n",
381
+ " )\n",
382
+ " (up_proj): lora.Linear(\n",
383
+ " (base_layer): Linear(in_features=3584, out_features=18944, bias=False)\n",
384
+ " (lora_dropout): ModuleDict(\n",
385
+ " (default): Identity()\n",
386
+ " )\n",
387
+ " (lora_A): ModuleDict(\n",
388
+ " (default): Linear(in_features=3584, out_features=16, bias=False)\n",
389
+ " )\n",
390
+ " (lora_B): ModuleDict(\n",
391
+ " (default): Linear(in_features=16, out_features=18944, bias=False)\n",
392
+ " )\n",
393
+ " (lora_embedding_A): ParameterDict()\n",
394
+ " (lora_embedding_B): ParameterDict()\n",
395
+ " (lora_magnitude_vector): ModuleDict()\n",
396
+ " )\n",
397
+ " (down_proj): lora.Linear(\n",
398
+ " (base_layer): Linear(in_features=18944, out_features=3584, bias=False)\n",
399
+ " (lora_dropout): ModuleDict(\n",
400
+ " (default): Identity()\n",
401
+ " )\n",
402
+ " (lora_A): ModuleDict(\n",
403
+ " (default): Linear(in_features=18944, out_features=16, bias=False)\n",
404
+ " )\n",
405
+ " (lora_B): ModuleDict(\n",
406
+ " (default): Linear(in_features=16, out_features=3584, bias=False)\n",
407
+ " )\n",
408
+ " (lora_embedding_A): ParameterDict()\n",
409
+ " (lora_embedding_B): ParameterDict()\n",
410
+ " (lora_magnitude_vector): ModuleDict()\n",
411
+ " )\n",
412
+ " (act_fn): SiLU()\n",
413
+ " )\n",
414
+ " (input_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)\n",
415
+ " (post_attention_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)\n",
416
+ " )\n",
417
+ " )\n",
418
+ " (norm): Qwen2RMSNorm((3584,), eps=1e-06)\n",
419
+ " (rotary_emb): LlamaRotaryEmbedding()\n",
420
+ " )\n",
421
+ " (lm_head): Linear(in_features=3584, out_features=152064, bias=False)\n",
422
+ " )\n",
423
+ " )\n",
424
+ ")"
425
+ ]
426
+ },
427
+ "execution_count": 7,
428
+ "metadata": {},
429
+ "output_type": "execute_result"
430
+ }
431
+ ],
432
+ "source": [
433
+ "#推理模式t\n",
434
+ "FastLanguageModel.for_inference(model)"
435
+ ]
436
+ },
437
+ {
438
+ "cell_type": "code",
439
+ "execution_count": 9,
440
+ "id": "4a8793be-c4ae-4166-9b5f-b21bbd898a75",
441
+ "metadata": {},
442
+ "outputs": [],
443
+ "source": [
444
+ "prompt_style = \"\"\"Below is an instruction that describes a task, paired with an input that provides further context. \n",
445
+ "Write a response that appropriately completes the request. \n",
446
+ "Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.\n",
447
+ "\n",
448
+ "### Instruction:\n",
449
+ "You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. \n",
450
+ "Please answer the following medical question. \n",
451
+ "\n",
452
+ "### Question:\n",
453
+ "{}\n",
454
+ "\n",
455
+ "### Response:\n",
456
+ "<think>{}\"\"\"\n",
457
+ "question = \"Given a patient who experiences sudden-onset chest pain radiating to the neck and left arm, with a past medical history of hypercholesterolemia and coronary artery disease, elevated troponin I levels, and tachycardia, what is the most likely coronary artery involved based on this presentation?\"\n",
458
+ "input=tokenizer()"
459
+ ]
460
+ },
461
+ {
462
+ "cell_type": "code",
463
+ "execution_count": 10,
464
+ "id": "7d398d9f-74bd-4dcd-a9fe-7025a7f4bf92",
465
+ "metadata": {},
466
+ "outputs": [
467
+ {
468
+ "name": "stdout",
469
+ "output_type": "stream",
470
+ "text": [
471
+ "<|begin▁of▁sentence|>Below is an instruction that describes a task, paired with an input that provides further context. \n",
472
+ "Write a response that appropriately completes the request. \n",
473
+ "Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.\n",
474
+ "\n",
475
+ "### Instruction:\n",
476
+ "You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. \n",
477
+ "Please answer the following medical question. \n",
478
+ "\n",
479
+ "### Question:\n",
480
+ "Given a patient who experiences sudden-onset chest pain radiating to the neck and left arm, with a past medical history of hypercholesterolemia and coronary artery disease, elevated troponin I levels, and tachycardia, what is the most likely coronary artery involved based on this presentation?\n",
481
+ "\n",
482
+ "### Response:\n",
483
+ "<think>\n",
484
+ "Okay, so I need to figure out which coronary artery is involved based on the patient's symptoms. Let me start by breaking down the information given.\n",
485
+ "\n",
486
+ "The patient has sudden-onset chest pain that goes to the neck and left arm. Chest pain radiating to the left arm often suggests a specific pattern. I remember that chest pain can be caused by various heart conditions, but the location gives a clue.\n",
487
+ "\n",
488
+ "Next, there's a past medical history of hypercholesterolemia and coronary artery disease. Hypercholesterolemia is high cholesterol, which can lead to atherosclerosis, narrowing of the arteries. Since they have coronary artery disease, that's a clue that the problem is related to the heart's blood vessels.\n",
489
+ "\n",
490
+ "Elevated troponin I levels are also present. Troponin I is a marker for heart muscle damage, so this suggests that there might be ongoing heart issues, maybe from a STEMI (ST-segment elevation myocardial infarction) or a severe angina.\n",
491
+ "\n",
492
+ "Tachycardia is another symptom, which could be due to a heart attack causing the heart to pump faster as it tries to meet the increased demand for blood.\n",
493
+ "\n",
494
+ "Putting it all together, the chest pain radiating to the left arm is a key point. I think that when chest pain comes with radiensation to the left arm, it's often due to a left-sided coronary artery disease. Specifically, the left anterior descending (LAD) artery or the left circumflex (LCx) artery might be involved. \n",
495
+ "\n",
496
+ "I recall that the LAD is the most common coronary artery, and if it's blocked, it can cause significant chest pain. The LCx, on the other hand, is more medial and sometimes referred to as the \"left arm\" artery because of its distribution. \n",
497
+ "\n",
498
+ "The elevated troponin I suggests ongoing ischemia, which could be due to a recent or ongoing block of one of these arteries. Since hypercholesterolemia increases the risk of atherosclerosis, it's more likely that a block has occurred in a coronary artery that supplies the left side of the heart.\n",
499
+ "\n",
500
+ "So, considering all these factors—the location of the pain, the associated symptoms, the past medical history, and the elevated troponin—I would conclude that the likely involved coronary artery is either the LAD or LCx. Given that the patient's presentation aligns with a left-sided issue, both are possibilities, but typically, the LAD is more common and a better first-line treatment target. However, without more specific details, both are plausible.\n",
501
+ "</think>\n",
502
+ "\n",
503
+ "The patient presents with sudden-onset chest pain radiating to the left arm, a common indicator of left-sided coronary artery disease. Given the history of hypercholesterolemia, coronary artery disease, elevated troponin I levels, and tachycardia, the likely involved coronary artery is either the left anterior descending (LAD) or left circumflex (LCx) artery. \n",
504
+ "\n",
505
+ "The LAD is the most common coronary artery, and its block is often associated with significant chest pain. The LCx, while less common, is also referred to as the \"left arm\" artery. Both are plausible based on the patient's symptoms, with the LAD typically being a higher priority for treatment due to its commonality. \n",
506
+ "\n",
507
+ "**Answer:** The likely involved coronary artery is either the left anterior descending (LAD) or left circumflex (LCx) artery.<|end▁of▁sentence|>\n"
508
+ ]
509
+ }
510
+ ],
511
+ "source": [
512
+ "print(response[0])"
513
+ ]
514
+ },
515
+ {
516
+ "cell_type": "code",
517
+ "execution_count": null,
518
+ "id": "2ba7bfc2-df71-45c9-888f-29cb75a72392",
519
+ "metadata": {},
520
+ "outputs": [],
521
+ "source": []
522
+ }
523
+ ],
524
+ "metadata": {
525
+ "kernelspec": {
526
+ "display_name": "Python 3 (ipykernel)",
527
+ "language": "python",
528
+ "name": "python3"
529
+ },
530
+ "language_info": {
531
+ "codemirror_mode": {
532
+ "name": "ipython",
533
+ "version": 3
534
+ },
535
+ "file_extension": ".py",
536
+ "mimetype": "text/x-python",
537
+ "name": "python",
538
+ "nbconvert_exporter": "python",
539
+ "pygments_lexer": "ipython3",
540
+ "version": "3.10.8"
541
+ }
542
+ },
543
+ "nbformat": 4,
544
+ "nbformat_minor": 5
545
+ }