Upload 3 files
Browse files์๋์ ๋ฐ์ดํฐ๋ก ์คํ๊ฐ๋ฅ์ฑ ๊ฒํ
- prototype/kcmii_lm_full_ft.ipynb +311 -0
- prototype/kcmii_lm_rag.ipynb +483 -0
- prototype/kcmii_lm_test.ipynb +1104 -0
prototype/kcmii_lm_full_ft.ipynb
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"id": "96468b93-985d-4714-b234-b56ad8f1cfe3",
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"source": [
|
| 8 |
+
"full fine tuning ์ผ๋ก ์งํ"
|
| 9 |
+
]
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"cell_type": "code",
|
| 13 |
+
"execution_count": 12,
|
| 14 |
+
"id": "56c6181a-2b4e-49ac-a25d-a54eb3a51110",
|
| 15 |
+
"metadata": {},
|
| 16 |
+
"outputs": [
|
| 17 |
+
{
|
| 18 |
+
"name": "stdout",
|
| 19 |
+
"output_type": "stream",
|
| 20 |
+
"text": [
|
| 21 |
+
"์ฌ์ฉ ๋๋ฐ์ด์ค: mps\n"
|
| 22 |
+
]
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"name": "stderr",
|
| 26 |
+
"output_type": "stream",
|
| 27 |
+
"text": [
|
| 28 |
+
"Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n"
|
| 29 |
+
]
|
| 30 |
+
}
|
| 31 |
+
],
|
| 32 |
+
"source": [
|
| 33 |
+
"import torch\n",
|
| 34 |
+
"from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer\n",
|
| 35 |
+
"from datasets import load_dataset\n",
|
| 36 |
+
"\n",
|
| 37 |
+
"device = torch.device(\"mps\" if torch.backends.mps.is_available() else \"cpu\")\n",
|
| 38 |
+
"print(\"์ฌ์ฉ ๋๋ฐ์ด์ค:\", device)\n",
|
| 39 |
+
"\n",
|
| 40 |
+
"model_name = \"EleutherAI/polyglot-ko-1.3b\"\n",
|
| 41 |
+
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
|
| 42 |
+
"model = AutoModelForCausalLM.from_pretrained(model_name).to(device)\n",
|
| 43 |
+
"\n",
|
| 44 |
+
"dataset = load_dataset(\"json\", data_files=\"dataset/kcmii_major_instruction_data_summarized.jsonl\", split=\"train\")\n",
|
| 45 |
+
"\n",
|
| 46 |
+
"def tokenize(example):\n",
|
| 47 |
+
" prompt = f\"{example['instruction']}\\n๋ต๋ณ: {example['response']}\"\n",
|
| 48 |
+
" print(prompt)\n",
|
| 49 |
+
" tokenized = tokenizer(prompt, padding=\"max_length\", truncation=True, max_length=512)\n",
|
| 50 |
+
" tokenized[\"labels\"] = tokenized[\"input_ids\"].copy()\n",
|
| 51 |
+
" return tokenized"
|
| 52 |
+
]
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"cell_type": "code",
|
| 56 |
+
"execution_count": 8,
|
| 57 |
+
"id": "aa3434d0-7b59-4db4-9b74-90d0da6ef6eb",
|
| 58 |
+
"metadata": {
|
| 59 |
+
"scrolled": true
|
| 60 |
+
},
|
| 61 |
+
"outputs": [
|
| 62 |
+
{
|
| 63 |
+
"name": "stderr",
|
| 64 |
+
"output_type": "stream",
|
| 65 |
+
"text": [
|
| 66 |
+
"Map: 100%|โโโโโโโโโโ| 50/50 [00:00<00:00, 1405.40 examples/s]"
|
| 67 |
+
]
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"name": "stdout",
|
| 71 |
+
"output_type": "stream",
|
| 72 |
+
"text": [
|
| 73 |
+
"์ด๋ฌธํ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 74 |
+
"๋ต๋ณ: ์ด๋ฌธํ ์ ๊ณต์์๋ ๋ยท์์ ์ธ์ด์ ์๋ฆฌ๋ฅผ ์ฒด๊ณ์ ์ผ๋ก ์ฐ๊ตฌํ๊ณ , ๊ฐ ๋๋ผ์ ๋ฌธํ๊ณผ ๋ฌธํ๋ฅผ ์ดํดํ์ฌ ์ธ๊ณํ ์๋์ ์๊ตฌ๋๋ ์ธ๋ฌธํ์ ๊ต์๊ณผ ์ธ์ด ๊ด๋ จ ์ค๋ฌด์ ๋ฅ๋ ฅ์ ๊ณ ๋ฐํ๋ค. ์ด๋ฌธํ ์ ๊ณต์ ์ธ์ด์ ๊ฐ๊ฐ, ๋
ผ๋ฆฌ์ ์ฌ๊ณ ๋ ฅ, ๋นํ์ ๋ถ์ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์ธ์ด ์ ๋ฌธํ ๊ทธ๋ฆฌ๊ณ ๋ค์ํ ๊ตญ๊ฐ์ ์ฌํยท๋ฌธํ์ ํน์ฑ์ ๋ํด ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 75 |
+
"๋ฌธํยท์ธ๋ฅยท์ญ์ฌ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 76 |
+
"๋ต๋ณ: ๋ฌธํยท์ธ๋ฅยท์ญ์ฌ ์ ๊ณต์ ์ธ๊ฐ์ด ์ถํํ ์๊ธฐ๋ถํฐ ํ์ฌ๊น์ง์ ๋ฌธํ์ ์ฌํ๋ฅผ ํ๊ตฌํ ๊ณ , ์๋๋ณ๋ก ๋ฌธํ ๋ฐ ์ฌํ์ ํน์ฑ์ ์ฐ๊ตฌํ์ฌ ์ญ์ฌ์ ์์๋ฅผ ์ดํดํ๋ ๋ถ์ผ์ด๋ค. ๋ฌธํยท ์ธ๋ฅยท์ญ์ฌ ์ ๊ณต์ ๊ณ ๋์ ๊ด์ฐฐ๋ ฅ ๋ฐ ๋ถ์๋ ฅ์ด ํ์ํ๊ณ , ํ๊ตญ์ฌ ๋ฐ ์ธ๊ณ์ฌ ๊ทธ๋ฆฌ๊ณ ๋ค์ ํ ์ฌํ์ ยท์ญ์ฌ์ ํ์์ ๋ํด ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 77 |
+
"์ฒ ํ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 78 |
+
"๋ต๋ณ: ์ฒ ํ ์ ๊ณต์์๋ ์์ ์ ๋๋ฌ์ผ ์์ฐ, ์ฐ์ฃผ, ์ฌํ์ ๋ํ ์๋ฏธ๋ฅผ ํต์ฐฐํ๊ณ ์ด๋ฅผ ๋
ผ๋ฆฌ ์ ์ผ๋ก ๋ถ์ํ์ฌ ์ธ๊ฐ ํ์์ ์ธ๊ณ์ ๋ํ ์๋ฏธ๋ฅผ ํ์ํ๋ค. ์ฒ ํ ์ ๊ณต์ ์์ฐ, ์ฐ์ฃผ, ์ฌ ํ๋ฅผ ๋
ผ๋ฆฌ์ ์ผ๋ก ๋ถ์ํ์ฌ ๊ทธ๊ฒ์ ์๋ฏธ๋ฅผ ํ์ํ๋ ํตํฉ์ ์ฌ๊ณ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์ธ๊ฐ ๋ณธ์ฑ๊ณผ ์กด์ฌ ๊ฐ์น, ์ถ์ ๋ณธ์ง์ ๋ํด ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 79 |
+
"๋ฒํ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 80 |
+
"๋ต๋ณ: ๋ฒํ ์ ๊ณต์ ๋ฒ๊ณผ ๊ด๋ จ๋ ๊ธฐ์ด์ ์ธ ์ด๋ก ์์๋ถํฐ ์ฌ๋ฒ, ๊ณต๋ฒ, ์ฌํ๋ฒ ๋ฐ ๊ตญ์ ๊ด๊ณ์ ๊ด๋ จ๋ ๋ค์ํ ๋ฒํ ์ด๋ก ๋ค์ ์ต๋ํ๊ณ ์ด๋ฅผ ๊ตฌ์ฒด์ ์ธ ์ฌ๋ก์ ์ ์ฉํ๋ค. ๋ฒํ ์ ๊ณต์๋ ์ฌ๊ฑด๊ณผ ์ํฉ์ ๋ถ์ํ์ฌ ํ๋นํ ๊ฒฐ๋ก ์ ์ด๋ฅผ ์ ์๋ ์ฌ๊ณ ๋ ฅ๊ณผ ๊ณต์ ํ ํ๋จ๋ ฅ์ด ํ์ํ ๊ณ , ๋ฒ๋ฅ ๊ณผ ๊ด๋ จ๋ ์ฌํํ์ ๋ฐ ์ฌํ๋ฌธ์ ์ ๋ํด ๊ด์ฌ๊ณผ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.\n",
|
| 81 |
+
"์ ์นยท๊ตญ์ ๊ด๊ณ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 82 |
+
"๋ต๋ณ: ์ ์นยท๊ตญ์ ๊ด๊ณ ์ ๊ณต์ ํ ๊ตญ๊ฐ ๋ด ํน์ ์ฌ๋ฌ ๊ตญ๊ฐ ๊ฐ์ ์ ์น์ ํ์์ ๋ถ์ํ๊ณ ๋นํ ํ๋ฉฐ, ๊ตญ๋ดยท์ธ์ ์ ์น์ ํ์๊ณผ ๊ตญ์ ๊ด๊ณ์ ๋ํ ์ด๋ก ๊ณผ ์ค์ ๋ฅผ ์ฐ๊ตฌํ๋ค. ์ ์นยท๊ตญ์ ๊ด ๊ณ ์ ๊ณต์๋ ๊ตญ์ ์ ํ์๊ณผ ๊ตญ๊ฐ ๊ฐ ๊ด๊ณยท์ธ๋ ฅ ๋ณํ ๋ฑ์ ํ์
ํ ์ ์๋ ๋ถ์๋ ฅ๊ณผ ๋
ผ๋ฆฌ ๋ ฅ์ด ํ์ํ๊ณ , ๊ตญ๋ดยท์ธ์ ๋ค์ํ ์ ์นยท๊ฒฝ์ ยท์ฌํ์ ์ด์์ ๋ํ ๋ฌธ์ ์์๊ณผ ๊ด์ฌ, ํฅ๋ฏธ ๊ฐ ์์ด์ผ ํ๋ค.\n",
|
| 83 |
+
"ํ์ ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 84 |
+
"๋ต๋ณ: ํ์ ์ ๊ณต์ ๊ตญ๋ฏผ๊ณผ ์ ๋ถ ๊ฐ์ ์ํธ์์ฉ, ๋ค์ํ ๊ณต๊ณต๋ถ๋ฌธ์์์ ํ์ ๋ฐ ์ ์ฑ
๊ณผ ๊ด ๋ จ๋ ํ์์ ๋ถ์ํจ์ผ๋ก์จ ๊ณต๊ณต๋ถ์ผ ๊ด๋ฆฌ ๋ฑ ๏ฟฝ๏ฟฝ๊ฐ ์ด์์ ํจ์จ์ ์ผ๋ก ์ํํ ์ ์๋ ๋ฐฉ ์์ ๋ชจ์ํ๋ค. ํ์ ์ ๊ณต์๋ ์ฌํ ๋ฌธ์ ์ ๋ํ ํฉ๋ฆฌ์ ์ธ ํ๋จ ๋ฅ๋ ฅ๊ณผ ์ฌํ ๋ฌธ์ ๋ฅผ ํจ ์จ์ ์ผ๋ก ํด๊ฒฐํ ์ ์๋ ๋ฌธ์ ํด๊ฒฐ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ๊ณต๊ณต์ ๋ฌธ์ ์ ์ ์ฑ
๊ทธ๋ฆฌ๊ณ ์ ๋ถ์ ๊ณต๊ณต ๊ธฐ๊ด์ ์ฑ๊ฒฉ, ์
๋ฌด ๋ฑ์ ๋ํด ๊ด์ฌ๊ณผ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.\n",
|
| 85 |
+
"์ฌํ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 86 |
+
"๋ต๋ณ: ์ฌํ ์ ๊ณต์ ์ฌํ์ ๋ค์ํ ํ์๊ณผ ๋ฌธ์ ๋ฅผ ๊ด์ฐฐ, ๋ถ์ํ๊ณ ์ด๋ฅผ ํตํด ๊ทธ๊ฒ์ ์๋ฏธ๋ฅผ ํด์ํจ์ผ๋ก์จ ์ฌํ ๊ตฌ์ฑ์๋ค ๊ฐ์ ์ํธ์์ฉ ๋ฐ ์ฌํ๊ตฌ์กฐ์ ๋ํ์ฌ ํ๊ตฌํ๊ณ ํ๋ ์ฌํ ์ ๋ฌธ์ ๋ค์ ํด๊ฒฐํ๋ค. ์ฌํ ์ ๊ณต์๋ ์ฌํ ํ์์ ๊ฐ๊ด์ ์ผ๋ก ๊ด์ฐฐํ๊ณ ๊ณผํ์ ์ผ๋ก ๋ถ ์ํ ์ ์๋ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ๊ฐ์ธ์ ยท์ฌํ์ ์์ค์ ๋ค์ํ ๋ฌธ์ ๋ฅผ ํ๊ตฌํ๋ ๊ฒ์ ๊ด ์ฌ๊ณผ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.\n",
|
| 87 |
+
"์ฌ๋ฆฌ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 88 |
+
"๋ต๋ณ: ์ฌ๋ฆฌ ์ ๊ณต์ ์ธ๊ฐ์ ํ๋๊ณผ ์ ์ ๊ณผ์ , ๊ฐ์ธ ๊ฐ ์ํธ๊ด๊ณ ํ์ฑ์ ๊ดํ ์ฌ์ธต์ ์ฐ๊ตฌ๋ฅผ ํตํด ๊ฐ์ธ์ด ๊ฐ์ง ์ฌ๋ฆฌยท์ ์์ ๋ฌธ์ ๋ฅผ ํด๊ฒฐํ๊ณ ์กฐํ๋ก์ด ์ธ๊ฒฉ ํ์ฑ๊ณผ ์ธ์ฑ ํ๋ณต์ ์ ํ ๋ฐฉ์์ ๋ชจ์ํ๋ค. ์ฌ๋ฆฌ ์ ๊ณต์๋ ์ฌ๋ฆฌ ํ์์ ๋ํ ์คํ๊ณผ ์กฐ์ฌ๋ฅผ ๊ณผํ์ ์ผ๋ก ์ค์ํ ๊ณ , ์ธ๋ฐํ๊ฒ ๊ด์ฐฐํ๋ฉฐ, ๊ฒฐ๊ณผ๋ฅผ ๋
ผ๋ฆฌ์ ์ผ๋ก ํด์ํ ์ ์๋ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์ฌ๋๋ค์ ์ฌ๊ณ , ์ฑ๊ฒฉ, ํ๋ ๋ฐ ๊ทธ์ ๊ด๋ จ๋๋ ์ฌํ ํ์์ ๋ํ ๊ด์ฌ๊ณผ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.\n",
|
| 89 |
+
"์ฌํ๋ณต์ง์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 90 |
+
"๋ต๋ณ: ์ฌํ๋ณต์ง ์ ๊ณต์ ๊ฐ์กฑยท์๋ยท์ฒญ์๋
ยท๋
ธ์ธยท์ฌ์ฑ ๋ฑ ์ฌํ ๊ตฌ์ฑ์์ ์ผ์ ํ ์ํ ์์ค ๋ฐ ๋ณด๊ฑด ์ํ๋ฅผ ํ๋ณดํ๊ธฐ ์ํด ์ฌํ ์ ์ฑ
๋ฐ ์ ๋์ ๊ด๋ จ๋ ์ด๋ก ๊ณผ ๋ฐฉ๋ฒ์ ํ๊ตฌํ๋ค. ์ฌ ํ๋ณต์ง ์ ๊ณต์๋ ์ฌํ ๊ตฌ์ฑ์์ ์ผ์ ํ ์ํ ์์ค ๋ฐ ๋ณด๊ฑด ์ํ๋ฅผ ์ง์ํ๊ธฐ ์ํ ์ฌํ ์ ์ฑ
๋ฐ ์ ๋ ๋ฑ์ ํ๋ฌธ์ ์ผ๋ก ์ดํดํ ์ ์๋ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์ฃผ์ ์ด์์ ๋ํ ๊ด์ฌ ๊ณผ ๋ด์ฌ์ ์ ์ด ์์ด์ผ ํ๋ค.\n",
|
| 91 |
+
"๋ฌธํ์ ๋ณด์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 92 |
+
"๋ต๋ณ: ๋ฌธํ์ ๋ณด ์ ๊ณต์ ๊ฐ์ข
์ง์ ํ๋์ ํ์ํ ์ ๋ณด ๋ฐ ๋ฌธํ์ ์์ฑ์ ์ดํดํ๊ณ , ๋ค์ํ ๋ฏธ๋์ด๋ฅผ ํตํด ์ ๋ณด๋ฅผ ํจ์จ์ ์ผ๋ก ์์ง, ์ ๋ฆฌ, ๊ฐ๊ณต, ๊ด๋ฆฌ, ๋ฐฐํฌํ๋ ์ง์ ์ ๋ณด ๊ด๋ฆฌ ๋ฅ๋ ฅ ์ ๊ณ๋ฐํ๋ค. ๋ฌธํ์ ๋ณด ์ ๊ณต์๋ ์ํฉ์ ๋ฐ๋ฅธ ์ ํฉํ ๋์ ๋ฐ ์๋ฃ์ ์์งยท์ ๋ฆฌยท๊ฐ๊ณต ๋ฅ ๋ ฅ, ๋์ ๋ฐ ์๋ฃ์ ๊ด๋ฆฌ ๋ฐ ํ์ฉ์ ์ํ ์ ์ฐ ์ฒ๋ฆฌ ๋ฐ ํต๊ณ ์ฒ๋ฆฌ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ๋ค์ ํ ๋ถ์ผ์ ๋์์ ์๋ฃ์ ๋ํ ๊ด์ฌ๊ณผ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.\n",
|
| 93 |
+
"์ธ๋ก ยทํ๋ณดยท๋ฏธ๋์ด์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 94 |
+
"๋ต๋ณ: ์ธ๋ก ยทํ๋ณดยท๋ฏธ๋์ด ์ ๊ณต์ ์ฌํ์ ์ฌ๋ฌ ์์ญ์์ ์ด๋ฃจ์ด์ง๋ ๋์ค ๋งค์ฒด์ ์ํต ๊ณผ์ ์ ๋ถ์, ์ฐ๊ตฌํ์ฌ ๋ค์ํ ๋ฏธ๋์ด ์ฝํ
์ธ ์ ์๊ณผ ๋ฐ๋์งํ ๋ฏธ๋์ดยท๋์ค ๋งค์ฒด์ ๋ฐ์ ๋ฐฉ ์์ ๋ํด ์ฐ๊ตฌํ๋ค. ์ธ๋ก ยทํ๋ณดยท๋ฏธ๋์ด ์ ๊ณต์๋ ์ต์ ํธ๋ ๋๋ฅผ ํ์
ํ๋ ๋ฅ๋ ฅ, ์ ํํ ์ ๋ณด ์ ๋ฌ์ ์ํ ์ปค๋ฎค๋์ผ์ด์
๋ฅ๋ ฅ, ๊ทธ๋ฆฌ๊ณ ๋งค๋ ฅ์ ์ฝํ
์ธ ์์ฐ์ ์ํ ์์ ์ฑ ๋ฐ ์ฐฝ ์๋ ฅ์ด ํ์ํ๊ณ , ๋์ค ๋งค์ฒด์ ๋ค์ํ ์ฝํ
์ธ , ๋ด๋ฏธ๋์ด ๋ฑ์ ๋ํ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 95 |
+
"๋์ยท์ง์ญ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 96 |
+
"๋ต๋ณ: ๋์ยท์ง์ญ ์ ๊ณต์ ๋์ ๋ฐ ์ง์ญ๊ณผ ๊ด๋ จ๋ ๊ฐ์ข
๋ฌธ์ ๋ฅผ ๋ถ์ํ๊ณ ์ด๋ฅผ ํฉ๋ฆฌ์ ์ผ๋ก ํด ๊ฒฐํ ์ ์๋ ๋ฐฉ์์ ๋ชจ์ํ์ฌ ๊ตญํ ๋ฅผ ๊ฒฝ์ ์ , ์ฌํ์ , ๋ฌธํ์ ์ธก๋ฉด์์ ์กฐํ๋กญ๊ณ ๊ท ํ ์๊ฒ ๋ฐ์ ํ๋ ๋ฐฉ์์ ๋ํด ์ฐ๊ตฌํ๋ค. ๋์ยท์ง์ญ ์ ๊ณต์๋ ๋์์ ๊ธฐ๋ฅ๊ณผ ์ญํ , ๊ตํต ์ฒด ๊ณ, ์ง์ญ์ ํน์ฑ์ ์ ํฉํ ์ฌํ๊ธฐ๋ฐ์์ค ๋ฑ์ ๋ํ ์ดํด ๋ฅ๋ ฅ๊ณผ ์ฐฝ์๋ ฅ, ๊ณต๊ฐ์ง๊ฐ๋ ฅ์ด ํ์ํ๊ณ , ๊ณต๊ฐ์ ๋ํ ํธ๊ธฐ์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 97 |
+
"๊ตฐ์ฌยท๊ตญ๋ฐฉยท์๋ณด์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 98 |
+
"๋ต๋ณ: ๊ตฐ์ฌยท๊ตญ๋ฐฉยท์๋ณด ์ ๊ณต์ ๊ตฐ์ฌ ์ ์ฑ
๋ฐ ์ ๋ต์ ํ์ํ ์ ๋ฌธ ์ง์์ ์ต๋ํ๊ณ , ๋ค์ํ ์ํฉ์์ ์ค์ง์ ์ผ๋ก ๋์ฒ ๊ฐ๋ฅํ ๊ตฐ์ฌ ์ด์ฉ ๋ฅ๋ ฅ์ ๊ณ๋ฐํ๋ฉฐ, ํฌ์ฒ ํ ๊ตญ๊ฐ๊ด์ ํจ์ํ ์ฌ ๊ตญ๊ฐ์ ์๋ณด์ ๊ธฐ์ฌํ๋ค. ๊ตฐ์ฌยท๊ตญ๋ฐฉยท์๋ณด ์ ๊ณต์๋ ๊ตฐ๋ณ๋ ฅ์ ํต์ํ๋ ๋ฆฌ๋์ญ๊ณผ ์ฌ ๋ฐ๋ฅธ ๊ตญ๊ฐ๊ด, ํ๋ จ ๋ฐ ์ค์ ์ ๋๋นํ ๊ฐํ ์ฒด๋ ฅ๊ณผ ์ ์ ๋ ฅ, ํต์ฐฐ๋ ฅ ๋ฐ ํ๋จ๋ ฅ์ด ํ์ํ๊ณ , ๊ตญ๊ฐ์ ์๋ณด ํ๊ฒฝ์ ๋ํ ์ดํด์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 99 |
+
"๊ฒฝ์ฐฐยท์๋ฐฉยท์์ ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 100 |
+
"๋ต๋ณ: ๊ฒฝ์ฐฐยท์๋ฐฉยท์์ ์ ๊ณต์ ๊ฐ์ข
๋ฒ์ฃ ํ์์ ์์ธ๊ณผ ๋์ฑ
์ ์ฐ๊ตฌํ๊ฑฐ๋ ์์ ๊ด๋ฆฌ์ ๋ ํ ์ด๋ก ์ ์ง์ ๋ฐ ์ค๋ฌด ๋ฅ๋ ฅ์ ์ต๋ํจ์ผ๋ก์จ ๋ฒ์ฃ, ์ํ ๋ฐ ์ฌ๋์ผ๋ก๋ถํฐ ๊ตญ๋ฏผ์ ์๋ช
๊ณผ ์ฌ์ฐ์ ๋ณดํธํ๋ค. ๊ฒฝ์ฐฐยท์๋ฐฉยท์์ ์ ๊ณต์๋ ํ์ฅ์์์ ๋๋ฐ ์ํฉ์ ํด๊ฒฐํ ์ ์๋ ๋ฆฌ๋์ญ๊ณผ ์กฐ์ง์ ์ฌ๊ณ , ์ฑ
์๊ฐ, ์ํฉํ๋จ๋ ฅ์ด ๏ฟฝ๏ฟฝ๏ฟฝ์ํ๊ณ , ์ฌํ ๋ฌธ์ ๋ฐ ๋ฒ์ฃ ํด๊ฒฐ, ๊ฐ์ข
์ฌ ๊ฑดยท์ฌ๊ณ ์ ๊ด์ฌ๊ณผ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.\n",
|
| 101 |
+
"๊ฒฝ์์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 102 |
+
"๋ต๋ณ: ๊ฒฝ์ ๊ด๋ จ ์ ๊ณต์์๋ ๊ธฐ์
๊ฒฝ์์ ํ์ํ ์ฌ๋ฌ ๊ฐ์ง ์ด๋ก ๊ณผ ๊ธฐ๋ฒ์ ์ฐ๊ตฌํ๊ณ , ์ํ ์ ํ์ํ ๋ค์ํ ์ํ ๋๋ ์๋น์ค๋ฅผ ์์ฐ, ์ ํต, ํ๋งคํ๋ ์ ๊ณผ์ ์ ๋ํด ์ฐ๊ตฌํ๋ค. ๊ฒฝ ์ํ ๊ด๋ จ ์ ๊ณต์์๊ฒ๋ ํจ์จ์ ์ธ ๊ฒฝ์ ํ๋์ ๋ํ ๋ถ์ยท์คํ ๋ฅ๋ ฅ๊ณผ ํฉ๋ฆฌ์ ์์ฌ๊ฒฐ์ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์กฐ์ง์ ์ฑ๊ณผ๋ฅผ ๋์ด๊ธฐ ์ํ ๊ฒฝ์ ํ๋ ๋ฐ ๊ธ๋ก๋ฒ ๊ธฐ์
ํ๊ฒฝ์ ๋ณํ์ ๋ํ ๊ด์ฌ๊ณผ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.\n",
|
| 103 |
+
"๊ฒฝ์ ยท๊ธ์ตยท๋ถ๋์ฐ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 104 |
+
"๋ต๋ณ: ๊ฒฝ์ ยท๊ธ์ตยท๋ถ๋์ฐ ์ ๊ณต์ ๊ตญ๊ฐ, ์กฐ์ง, ๊ฐ์ธ ๋ฑ์ ๊ฒฝ์ ์ํฉ์ ๋ถ์ยท์ง๋จํ์ฌ ๊ฒฝ์ ๊ด ๋ จ ๋ฌธ์ ์ ์ ์ ํ ๋์ํ ์ ์๋ ๋ฐฉ๋ฒ์ ์ฐ๊ตฌํ๋ค. ๋ฐ๋ผ์ ๊ธ์ต์ ํ๋ฆ์ ํ์
ํ๊ธฐ ์ ํ ๋ถ์๋ ฅ๊ณผ ์ํ ๋ฅ๋ ฅ, ์ ๋ณด์ฒ๋ฆฌ ๋ฐ ํ์ฉ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ๊ตญ๊ฐ ๋ฐ ์ธ๊ณ ๊ฒฝ์ ์ ํ๋ฆ์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 105 |
+
"๋ฌด์ญยท๋ฌผ๋ฅ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 106 |
+
"๋ต๋ณ: ๋ฌด์ญยท๋ฌผ๋ฅ ์ ๊ณต์ ๋ฌผํยท์๋น์คยท๊ธฐ์ ยท์์ ๋ฑ์ ๊ตญ๊ฐ ๊ฐ ์ด๋๊ณผ ๋ฌผํ์ ์์ฐ์์ ๊ณต ๊ธ์ ์ด๋ฅด๋ ์ผ๋ จ์ ๊ณผ์ ์ ๊ฒฝ์ ์ ์ด๊ณ ํจ์จ์ ์ผ๋ก ๊ด๋ฆฌํ๋ ๋ฐฉ๋ฒ ๋ฐ ๊ตญ์ ๊ฒฝ์ ๊ต๋ฅ์ ๊ด๋ จ๋ ์ฌ์์ ๋ํด ์ฐ๊ตฌํ๋ค. ๊ทธ๋ฌ๋ฏ๋ก ๋ฌด์ญยท๋ฌผ๋ฅ ์ ๊ณต์๋ ์ธ๊ณ ์๋น๊ฒฝํฅ ๋ฐ ์์์ ํ๋ฆ์ ๋ถ์ํ๊ณ ์ดํดํ๋ ๋ฅ๋ ฅ๊ณผ ๊ด๋ จ ์ ๋ณด๋ฅผ ๊ฒฝ์, ๊ฒฝ์ , ๋ฒ, ๋ณดํ, ์ธ๊ตญ์ด ๋ฑ์ ์ธ์ ํ๋ฌธ๊ณผ ์ฐ๊ณํ ์ ์๋ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ๊ตญ์ ๊ฒฝ์ ๊ต๋ฅ์ ๋ํด ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 107 |
+
"ํ๊ณยท์ธ๋ฌด์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 108 |
+
"๋ต๋ณ: ํ๊ณยท์ธ๋ฌด ์ ๊ณต์ ๊ธฐ์
์ ํ๊ณ์ ๋ณด๋ฅผ ์ฒด๊ณ์ ์ธ ๋ฐฉ๋ฒ์ผ๋ก ์ฐ์ถยท๋ถ์ํ๊ณ , ์ธ๋ฌด์ ๊ด ๋ จ๋ ํจ์จ์ ์ธ ์์ฌ๊ฒฐ์ ๋ฐฉ์์ ํ์ํ๋ค. ๋ฐ๋ผ์ ์ดํด๊ด๊ณ๋ฅผ ๋ถ์ํ๊ณ ์์ธกํ ์ ์๋ ์๋ฆฌ ๋ฅ๋ ฅ ๋ฐ ๋
ผ๋ฆฌ๋ ฅ์ด ํ์ํ๊ณ , ๊ฒฝ์ ๋ฐ ๊ธฐ์
์์์ ํจ์จ์ ๋ฐฐ๋ถ์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 109 |
+
"๊ด๊ด์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 110 |
+
"๋ต๋ณ: ๊ด๊ด ์ ๊ณต์์๋ ๊ด๊ด์ ํตํ ์ถ์ ์ง ํฅ์์ ์ํด ๊ตญ๋ด์ธ ๊ด๊ด์ง๋ฅผ ํ์
ํ๊ณ , ๊ด๊ด ํ์์ ๋ํ ํ๋ฌธ์ ๋ถ์๊ณผ ์ค๋ฌด์ ์ธ ์ ์ฑ
์ ์๋ฆฝํ๋ค. ๊ทธ๋ฌ๋ฏ๋ก ๊ด๊ด ์ํ์ ๊ฐ๋ฐ ๋ฐ ๊ธฐํ ๋ฅ๋ ฅ, ์ธ๊ตญ์ด ๋ฅ๋ ฅ, ์ธ์ ๋คํธ์ํฌ ํ์ฑ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์ฌํ ๋ฐ ๊ด๊ด์ ๋ํ ํฅ๋ฏธ ์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 111 |
+
"๊ต์ก์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 112 |
+
"๋ต๋ณ: ๊ต์ก ์ ๊ณต์ ๊ต์ก ๊ธฐ๊ด ์ํ์ ๊ต์กํ๋์ ์ด์ฒด์ ์ผ๋ก ์ดํดํ๊ณ , ๊ต์ก๊ณผ ๊ด๋ จํ ๋ค ์ํ ๋ฌธ์ ์ ๋ํด ํด๊ฒฐ ๋ฐฉ๋ฒ์ ๋ชจ์ํ๋ค. ๋ฐ๋ผ์ ๊ต์ก ํ์ ๋ฐ ๊ต์ก ์ ์ฑ
๋ฑ์ ๋ํ ํ๋จ ๋ ฅ๊ณผ ๊ต์ก ๋ฌธ์ ์ ์์ธ์ ์ฒด๊ณ์ ์ผ๋ก ํ์
ํ๋ ๋ถ์๋ ฅ์ด ํ์ํ๊ณ , ์ธ๊ฐ๊ณผ ๊ต์ก ํ๋์ ๋ํ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 113 |
+
"์ ์๊ต์ก์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 114 |
+
"๋ต๋ณ: ์ ์๊ต์ก ์ ๊ณต์์๋ ์ยท์ ์๊ธฐ ๋ฐ๋ฌ์ ๋ํ ์ฒด๊ณ์ ์ธ ์ด๋ก ์ ์ดํดํ๊ณ , ์ด๋ฅผ ํ์ฅ ์ ์ ์ฉํด ๋ณด๋ ์ค์ต์ ํตํด ์ยท์ ์์ ์ ์ฒด์ , ์ ์์ ๋ฐ๋ฌ์ ๋ฐ๋ฅธ ๊ต์ก ๋ฐฉ๋ฒ์ ๋ํด ํ ์ตํ๋ค. ๋ฐ๋ผ์ ์๋์ ๋ฐ๋ฌ, ์ฌ๋ฆฌ์ ํ๋, ๋ถ๋ชจ ๊ต์ก ๋ฑ ์๋์ ๋ํ ์ ๋ฌธ์ ์ธ ์ง์๊ณผ ์ ๋์ ์์ค์ ๋ง๋ ์์ฌ์ํต ๋ฅ๋ ฅ๊ณผ ์ธ์ด ๊ตฌ์ฌ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์๋ ๋ณด์ก์ ๋ํ ์ฑ
์ ๊ฐ์ด ์์ด์ผ ํ๋ค.\n",
|
| 115 |
+
"์ด๋ฑ๊ต์ก์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 116 |
+
"๋ต๋ณ: ์ด๋ฑ๊ต์ก ์ ๊ณต์์๋ ๋ง 6โผ12์ธ ์๋์ ๋์์ผ๋ก, ๊ทธ๋ค์ ์ฌ๋ฆฌ์ ํน์ฑ์ ํ์
ํ๊ณ ๊ตญ์ด, ์ํ, ๋ฏธ์ ๋ฑ ๋ค์ํ ๊ต๊ณผ ์ด๋ก ์ ์ ์ฉํ์ฌ ๊ต์ก ํ์ฅ์์ ๊ต์ฌ๋ก์ ์
๋ฌด๋ฅผ ์ํ ํ ์ ์๋ ๋ฐฉ๋ฒ๋ค์ ํ์ตํ๋ค. ๋ฐ๋ผ์ ์ด๋ฑํ๊ต ๊ต๊ณผ์ ๋ํ ๊ธฐ์ด ์ง์๊ณผ ๋ค์ํ ๊ต์ก ๋ฐฉ๋ฒ์ ๊ณ ์ํ ์ ์๋ ์ฐฝ์๋ ฅ, ์ด๋ฆฐ์ด์ ๋ฐ๋ฌ์ ๋ํ ๋์ ์ดํด ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์ด๋ฑ ๊ต์ก์ ๋ํ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 117 |
+
"์ค๋ฑ๊ต์ก์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 118 |
+
"๋ต๋ณ: ์ค๋ฑ๊ต์ก ์ ๊ณต์์๋ ๋ง 13โผ18์ธ ์คยท๊ณ ๋ฑํ๊ต ํ์๋ค์ ๋์์ผ๋ก ๊ทธ๋ค์ ๊ฐ๋ฅด์น ๊ธฐ ์ํด ํน์ ๊ต๊ณผ๋ชฉ์ ์ ๊ณตํ๊ณ , ๊ต์ก ํ์ฅ์์ ๊ต์ฌ๋ก์ ์
๋ฌด๋ฅผ ์ํํ ์ ์๋ ๋ฐฉ๋ฒ ๋ค์ ํ์ตํ๋ค. ์ด์ ์คยท๊ณ ๋ฑํ๊ต ๊ต๊ณผ์ ๋ํ ์ง์, ํํ ๊ต์ก๊ณผ์ ๋ฐ ๊ต์ก์ ์ฑ
์ ๋ํ ์ดํด, ํ์ ์ง๋ ๋ฅ๋ ฅ, ๊ต์ก ํ์์ ์ข
ํฉ์ ์ผ๋ก ์ดํดํ๊ณ ๋นํํ ์ ์๋ ๋ฅ๋ ฅ์ด ํ์ํ ๊ณ , ์คยท๊ณ ๋ฑํ๊ต ๊ต์ก์ ๋ํ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 119 |
+
"ํน์๊ต์ก์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 120 |
+
"๋ต๋ณ: ํน์๊ต์ก ์ ๊ณต์์๋ ์ ์ฒด์ , ์ ์ ์ , ์ฌํ์ ๋ฐ๋ฌ ์ฅ์ ๋ฅผ ์ง๋ ํ์๋ค์๊ฒ ์ ํฉํ ๊ต์ก์ ์ ๊ณตํ์ฌ ๊ทธ๋ค์ด ์ฌํ ๊ตฌ์ฑ์์ผ๋ก์ ์ํํ ์ ์๋๋ก ๊ต์กํ๋ ํน์๊ต์ก ์ ๋ฌธ ๊ฐ๋ฅผ ์์ฑํ๋ค. ๋ฐ๋ผ์ ํน์๊ต์ก์ ๋ํ ์ ๋ฌธ์ ์ธ ์ง์์ ๋ฐํ์ผ๋ก ๋ค์ํ ์ํฉ์์์ ๋์ฒ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์ฅ์ ํ์์ ๋ํ ์ฌ๋, ๊ต์ฌ๋ก์์ ์๋ช
์์๊ณผ ๋ด์ฌ ๋ฐ ํฌ์ ์ ์ ์ด ์์ด์ผ ํ๋ค.\n",
|
| 121 |
+
"์ํ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 122 |
+
"๋ต๋ณ: ์ํ ์ ๊ณต์์๋ ๋ค์ํ ๋ฌผ์ฒด๋ ๊ณต๊ฐ, ํ์์ ๊ณ๋ํํ์ฌ ๋ถ์ยท์ค๋ช
ํ๊ณ , ์์ ์ฒด๊ณ ์ ์ฑ์ง์ ์ฐ๊ตฌํ๋ค. ์ํ ์ ๊ณต์ ๋
ผ๋ฆฌ์ ์ธ ์ฌ๊ณ ์ ๋ถ์๋ ฅ, ์ถ๋ฆฌ๋ ฅ์ด ํ์ํ๊ณ , ๋ฌธ์ ํด ๊ฒฐ ๊ณผ์ ์ ์ค์ํ๊ฒ ์๊ฐํ๋ฉฐ ์์ ์ฑ์ง์ ๋ํ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.\n",
|
| 123 |
+
"ํต๊ณ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 124 |
+
"๋ต๋ณ: ํต๊ณ ์ ๊ณต์์๋ ์ฐ๊ตฌ๋ชฉ์ ์ ๋ถํฉํ๋ ์๋ฃ๋ฅผ ์์ง, ์์ฝํจ์ผ๋ก์จ ์ฌํ ๊ฐ ๋ถ์ผ์ ๋ค์ํ ํน์ฑ ๋ฐ ์ ๋ณด๋ฅผ ๋ถ์ํ๊ณ ํด์ํ๋ ์ด๋ก ๊ณผ ๋ฐฉ๋ฒ์ ์ฐ๊ตฌํ๋ค. ํต๊ณ ์ ๊ณต์ ๋ค์ํ ํต๊ณ ๋ฐฉ๋ฒ๋ก ์ ์ ์ฉํ ์ ์๋ ์์ฉ๋ ฅ๊ณผ ์ ๋ณด๋ฅผ ๋ถ์ํ๊ณ ์ถ๋ฆฌํ ์ ์๋ ๋ฅ๋ ฅ์ด ํ์ํ ๊ณ , ์ํ์ ๋ํ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.\n",
|
| 125 |
+
"๋ฌผ๋ฆฌยท์ฒ๋ฌธ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 126 |
+
"๋ต๋ณ: ๋ฌผ๋ฆฌยท์ฒ๋ฌธ ์ ๊ณต์์๋ ๋ชจ๋ ์์ฐํ์์ ์กด์ฌํ๋ ํ์ ์๋ฆฌ์ ์ฐ์ฃผ์์ ์ผ์ด๋๋ ๋ค์ํ ํ์์ ๊ด์ธกํ์ฌ ์ฐ์ฃผ์ ์ง์์ ๋ฒ์น์ ํ๊ตฌํ๋ค. ๋ฌผ๋ฆฌยท์ฒ๋ฌธ ์ ๊ณต์ ๋์ ๋ณด์ด ์ง ์๋ ์์ ์ธ๊ณ๋ฅผ ํฌํจํด ์ง๊ตฌ์ ์ฐ์ฃผ์ ๋ํ ์ฐฝ์์ ์ธ ์ฌ๊ณ ๋ฅ๋ ฅ, ์ฒ์ฒด๋ ์ง๊ตฌ์์ ์ผ์ด๋๋ ํ์์ ๋ถ์ํ๋ ๋
ผ๋ฆฌ์ ์ธ ์ฌ๊ณ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ๋ค์ํ ์์ฐ ํ์์ ๋ํ ๊ด ์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 127 |
+
"์ง์งยท๋๊ธฐยทํด์์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 128 |
+
"๋ต๋ณ: ์ง์งยท๋๊ธฐยทํด์ ์ ๊ณต์์๋ ์ง๊ตฌ๋ฅผ ๊ตฌ์ฑํ๋ ๋ฌผ์ง์ ์ฑ๋ถ, ๊ตฌ์กฐ, ํ์ฑ ๋ฐ ๋ณํ ๊ณผ์ ๋ฑ์ ์ฐ๊ตฌํ๊ณ ์ง๊ตฌ๋ฅผ ๋๋ฌ์ผ ๋๊ธฐ์ ํด์์ ์์ฐ ํ์์ ๋ํด ํ๊ตฌํ๋ค. ์ง์งยท๋๊ธฐยทํด ์ ์ ๊ณต์ ๊ณผํ ๊ต๊ณผ ์ ๋ฐ์ ๋ํ ๊ธฐ๋ณธ ์ง์ ๊ทธ๋ฆฌ๊ณ ์์ฐ ํ๊ฒฝ์ ๋ํ ๊ด์ฐฐ๋ ฅ๊ณผ ํ๊ตฌ๋ ฅ ์ด ํ์ํ๊ณ , ์์ฐ, ๋๊ธฐ, ๋ฐ๋ค์์ ๋ฐ์ํ๋ ์์ฐ ํ์์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 129 |
+
"๋๋ฆผยท์ถ์ฐยท์์ฐ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 130 |
+
"๋ต๋ณ: ๋๋ฆผยท์ถ์ฐยท์์ฐ ์ ๊ณต์์๋ ๋ยท์ถยท์์ฐ๋ฌผ์ ํจ์จ์ ์ผ๋ก ๊ด๋ฆฌํ๊ณ ๊ฐ๋ฐ, ์์ฐํ๊ธฐ ์ ํ ์ฐ๊ตฌ๋ฅผ ์ํํ๋ฉฐ, ๋ยท์ถยท์์ฐ ์ํ์ ์์ฐ, ๊ฐ๊ณต์ฒ๋ฆฌ, ์ ํต ๋ฑ์ ๋ํ ์ง์์ ์ต๋ํ ์ฌ ๊ด๋ จ๋ ๋ฌธ์ ๋ค์ ํ์ํ๊ณ ํด๊ฒฐ๋ฐฉ๋ฒ์ ๋ชจ์ํ๋ค. ๋๋ฆผยท์ถ์ฐยท์์ฐ ์ ๊ณต์ ์ํํ์ ๊ธฐ์ด์ ์๋ช
๊ณผํ ๋ฐ ํํ ์ง์์ด ํ์ํ๊ณ , ๋์ด์ด ํ๊ฒฝ๊ณผ ๋ยท์๋ฌผ์ ๋ํ ๊ด์ฌ์ด ์์ด ์ผ ํ๋ค.\n",
|
| 131 |
+
"์ํ์์์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 132 |
+
"๋ต๋ณ: ์ํ์์ ์ ๊ณต์์๋ ๊ฐ์ธ, ๊ฐ์ , ์ง๋จ์ ์์ ์ญ์ทจ์ ์์ํ ๋ฌธ์ ๋ฅผ ๋ถ์ํ๊ณ , ๊ฑด๊ฐ ํ ์ถ์ ์ ์ง๋ฅผ ๋ชฉ์ ์ผ๋ก ํ๋ ์์ํ์ ์ง์ ํฅ์์ ์ํด ๊ฑด๊ฐ ๋ฐ ์์์ ๊ด๋ฆฌยท์ฆ์งํ ๋ ๋ฐฉ์์ ๋ํ์ฌ ์ฐ๊ตฌํ๋ค. ์ํ์์ ์ ๊ณต์ ์ํ, ์์์ ๋ํ ์ค์์ฑ์ ์ดํดํ๊ณ ์ ํ ๊ฐ๋ฐ์ด๋ ์ฒจ๋จ๊ธฐ์ ์ ์ฉ์ ์ํ ์ฐฝ์๋ ฅ, ์์ฉ๋ ฅ์ด ํ์ํ๋ฉฐ, ๋ค์ํ ์ํ์ ์ฑ๋ถ๊ณผ ์์์์ ๋ํด ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.\n",
|
| 133 |
+
"๊ธฐ๊ณ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 134 |
+
"๋ต๋ณ: ๊ธฐ๊ณ ์ ๊ณต์์๋ ๋ค์ํ ์ฐ์
๋ถ์ผ์ ํ ๋๊ฐ ๋๋ ๊ฐ์ข
๊ธฐ๊ณ์ ์ฅ๋น์ ์ค๊ณ, ์ ์, ์ด ์ฉ, ๊ด๋ฆฌ ๋ฑ์ ๋ํ ์ด๋ก ๊ณผ ์์ฉ์ ๋ํด ์ฐ๊ตฌํ๋ค. ๊ธฐ๊ณ ์ ๊ณต์์๊ฒ๋ ์ํ, ๋ฌผ๋ฆฌ ๋ฑ ๊ธฐ์ด ๊ณผํ ์ง์์ด ํ์ํ๊ณ , ๊ธฐ๊ณ ์๋ ์๋ฆฌ๋ฅผ ์ดํดํ๊ธฐ ์ํ ๋
ผ๋ฆฌ๋ ฅ๊ณผ ์ง์ ๋ฐ ์ด๋ก ์ ์์ฉ ํ ์ ์๋ ๋ฅ๋ ฅ์ด ํ์ํ๋ฉฐ ๊ธฐ๊ณ, ์๋์ฐจ, ์ ๊ธฐ, ์ ์ ๋ฑ์ ๋ํ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.\n",
|
| 135 |
+
"์ ๊ธฐยท์ ์์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 136 |
+
"๋ต๋ณ: ์ ๊ธฐยท์ ์ ์ ๊ณต์์๋ ์ ๊ธฐ ๋ฐ ์๊ธฐ์ ํ๋ฆ๊ณผ ๊ณ ์ฒด, ๊ธฐ์ฒด, ์ง๊ณต ๋ด์์์ ์ ์ ์ด๋ ์ ํ๊ตฌํ๊ณ ์ด๊ฒ์ ์ค์ํ์ ์์ฉํ๋ ๋ฐฉ๋ฒ์ ์ฐ๊ตฌํ๋ ํ๋ฌธ์ด๋ค. ์ ๊ธฐยท์ ์ ์ ๊ณต์์ ๊ฒ๋ ์๋ก์ด ๊ธฐ๋ฅ์ ์ ๊ธฐยท์ ์์ ํ์ผ๋ก ๊ตฌํํ ์ ์๋ ์ฐฝ์๋ ฅ, ๋ถ์์ ์ฌ๊ณ ๊ฐ ํ์ํ ๊ณ , ์ ๊ธฐ์ ์ ์์คํ
์ดํด๋ฅผ ์ํ ๋
ผ๋ฆฌ์ ์ฌ๊ณ ๊ฐ ํ์ํ๋ฉฐ, ์ํ, ๋ฌผ๋ฆฌํ์ ๋ํ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.\n",
|
| 137 |
+
"์ปดํจํฐยท์ํํธ์จ์ด์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 138 |
+
"๋ต๋ณ: ์ปดํจํฐยท์ํํธ์จ์ด ์ ๊ณต์์๋ ์ปดํจํฐ์ ํ๋์จ์ด์ ์ํํธ์จ์ด, ๋ฉํฐ๋ฏธ๋์ด ๋ฑ ์ปดํจํฐ์ ๊ด๋ จ๋ ๊ธฐ์ ๊ณผ ์ง์์ ์ต๋ํ๊ณ ์ด๋ฅผ ๋ฐํ์ผ๋ก ๋ค์ํ ๋ถ์ผ์ ์ ์ฉํ๋ ํ๋ฌธ ์ด๋ค. ์ปดํจํฐยท์ํํธ์จ์ด ์ ๊ณต์์๊ฒ๋ ์๋ก์ด ์ํํธ์จ์ด ๊ฐ๋ฐ์ ํ์ํ ์ปดํจํฐ ํ๋ก ๊ทธ๋๋ฐ ์ธ์ด์ ๋ํ ์ง์๊ณผ ๋
ผ๋ฆฌ์ ์ฌ๊ณ ๋ฅ๋ ฅ ๋ฐ ์ฐฝ์๋ ฅ์ด ํ์ํ๊ณ , ์ํ์ ๋ํ ํฅ๋ฏธ์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 139 |
+
"์ ๋ณดยทํต์ ยท๋ณด์์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 140 |
+
"๋ต๋ณ: ์ ๋ณดยทํต์ ยท๋ณด์ ์ ๊ณต์์๋ ์ปดํจํฐ๊ณตํ๊ธฐ๋ฐ ์ง์์ ๋ฐํ์ผ๋ก ICT(์ ๋ณดํต์ ๊ธฐ์ ) ๋ฐ ๋ค์ํ ์ตํฉ์ฐ์
๋ถ์ผ์ ์ ๋ณดยทํต์ ยท๋ณด์์ ๊ดํ ์ด๋ก ์ ์ต๋ํ๊ณ ์ด๋ฅผ ์ ์ฉํ๋ ํ๋ฌธ์ด ๋ค. ์ ๋ณดยทํต์ ยท๋ณด์ ์ ๊ณต์์๊ฒ๋ ์ํ, ํต๊ณ, ๋ฌผ๋ฆฌ ๋ฑ์ ๋ํ ๊ธฐ๋ณธ ์ง์๊ณผ ์ ๊ธฐ, ์ ์, ์ปด ํจํฐ์ ๋ํ ๊ธฐ์ด์ง์, ๋
ผ๋ฆฌ์ ์ฌ๊ณ ๋ ฅ, ์ฐฝ์์ ์ด๋ฉฐ ์ ์ฐํ ์ฌ๊ณ , ์ ํํ ํ๋จ๋ ฅ์ด ํ์ ํ๊ณ , ๋ค์ํ ์ฐ์
์ ์ ๋ณดยทํต์ ๋ถ์ผ์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 141 |
+
"์ฐ์
๊ณตํ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 142 |
+
"๋ต๋ณ: ์ฐ์
๊ณตํ ์ ๊ณต์์๋ ์ ํ ์์ฐ๊ณผ ์ ๋ฌ์ ์ ๊ณผ์ ์ ํ์ํ ๊ธฐ๊ณ, ๊ธฐ์ , ์ธ์ ์์ ๋ฑ ์ ๊ดํ ์ต์ ์ ์์คํ
์ ์ฐ๊ตฌํ๋ค. ์ฐ์
๊ณตํ ์ ๊ณต์์๊ฒ๋ ๋ฐ์ดํฐ์ ๊ธฐ๋ฐํ์ฌ ์์คํ
์ ํจ์จ์ฑ์ ๋์ผ ์ ์๋ ๋ฌธ์ ํด๊ฒฐ๋ฅ๋ ฅ๊ณผ ์ฌ๊ณ ๋ ฅ์ด ํ์ํ๊ณ , ๊ณตํ๋ฟ ์๋๋ผ ๊ฒฝ์ํ ๋ฑ ์ ์ฌํ๊ณผํ์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 143 |
+
"๊ฑด์ถยทํ ๋ชฉ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 144 |
+
"๋ต๋ณ: ๊ฑด์ถยทํ ๋ชฉ ์ ๊ณต์์๋ ๊ฑด์ถ๋ฌผ ๋ฐ ๋๋ก, ํญ๊ตฌ, ๋ ๋ฑ์ ์ฌํ๊ธฐ๋ฐ์์ค์ ์ค๊ณํ๊ณ ๊ฑด์ถ ํ๊ธฐ ์ํ ์ด๋ก ์ ์ฐ๊ตฌํ๋ค. ๊ฑด์ถยทํ ๋ชฉ ์ ๊ณต์์๊ฒ๋ ์ํ, ๊ณผํ ๋ฑ ๊ธฐ์ด๋ถ์ผ๋ฅผ ๋น๋กฏํ ์ฌ ์ปดํจํฐ ํ์ฉ ๋ฅ๋ ฅ, ๊ณต๊ฐ ์ง๊ฐ๋ ฅ ๋ฐ ๊ณต๊ฐ ์ดํด๋ ฅ์ด ํ์ํ๊ณ , ์๋ฆ๋ค์ด ๊ฑด์ถ ์ค๊ณ๋ฅผ ์ ํ ๋ฌธํ์ ยท๋ฏธ์ ๊ฐ๊ฐ์ด ์์ด์ผ ํ๋ค.\n",
|
| 145 |
+
"ํ๊ฒฝยท์๋์ง์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 146 |
+
"๋ต๋ณ: ํ๊ฒฝยท์๋์ง ์ ๊ณต์์๋ ์์ฐ์ ๊ตฌ์ฑํ๋ ๋ฌผ, ๋๊ธฐ, ํ ์๊ณผ ์ํ๊ณ์์ ์ผ์ด๋๋ ํ ๊ฒฝ ๋ณํ, ์๋์ง์ ์์ฐ ๊ณผ์ ๋ฐ ๋ณํ ๊ณผ์ ๋ฑ์ ํ๊ตฌํ์ฌ ์ธ๊ฐ์ ์ถ์ ์ง ํฅ์์ ๋ชฉ์ ์ผ ๋ก ํ๋ ํ๋ฌธ์ด๋ค. ํ๊ฒฝยท์๋์ง ์ ๊ณต์์๊ฒ๋ ์คํ ์ฐ๊ตฌ๋ฅผ ์ํ ๋ถ์์ ์ฌ๊ณ ์ ์ง์ค๋ ฅ, ํํ, ๋ฌผ๋ฆฌ, ์ํ ๋ฑ ๊ธฐ์ด๊ณผํ ๋ถ์ผ์ ๋ํ ์ดํด ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ํ๊ฒฝ๋ฌธ์ ๋ฐ ์์ฐ์ ๋ ํ ๋์ ์ดํด์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 147 |
+
"์๋ช
๊ณผํยท์๋ช
๊ณตํ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 148 |
+
"๋ต๋ณ: ์๋ช
๊ณผํยท์๋ช
๊ณตํ ์ ๊ณต์์๋ ์๋ช
์ฒด์ ๋ํ ์ง์๊ณผ ์ดํด๋ฅผ ๋ฐํ์ผ๋ก ๋ค์ํ ์ ๋ช
ํ์์ ๊ณผ์ ์ ํ๊ตฌํ๋ฉฐ, ์๋ฃ, ํ๊ฒฝ ๋ฑ์ ์ฐ๊ตฌ์ ์ฐ์
์ ๊ธฐ์ด๊ฐ ๋๋ ํ๋ฌธ์ด๋ค. ์๋ช
๊ณผํยท์๋ช
๊ณตํ ์ ๊ณต์์๊ฒ๋ ์์ฐ๊ณผํ ๋ถ์ผ์ ๋ํ ๊น์ด ์๋ ์ดํด์ ์ง์ ๊ทธ๋ฆฌ๊ณ ๊ด์ฐฐ ๋ ฅ, ๋
ผ๋ฆฌ์ ์ฌ๊ณ , ๋ถ์๋ ฅ, ํต์ฐฐ๋ ฅ์ด ํ์ํ๊ณ , ๋ค์ํ ์๋ช
ํ์๊ณผ ์๋ช
์ฒด์ ๊ด์ฌ์ด ์์ด ์ผ ํ๋ค.\n",
|
| 149 |
+
"ํํยทํํ๊ณตํ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 150 |
+
"๋ต๋ณ: ํํยทํํ๊ณตํ ์ ๊ณต์์๋ ํํ ๋ฐ์์ ์๋ฆฌ๋ฅผ ์ค์ํ์ ์์ฉํ๋ ๊ธฐ์ ๊ณผ ๋ฐฉ๋ฒ์ ์ฐ๊ตฌํ๋ ํ๋ฌธ์ผ๋ก, ๊ณ ๋ถ์, ์ ์๋์ง, ํ๊ฒฝ๊ธฐ์ ๋ฑ ํํ ๊ณต์ ์ด ํ์ํ ์ฐ์
์ ํ์ฉ๋ ๋ ํ๋ฌธ์ด๋ค. ํํยทํํ๊ณตํ ์ ๊ณต์์๊ฒ๋ ์ํ, ๋ฌผ๋ฆฌ ๋ฑ ๊ธฐ์ด๊ณผํ๋ถ์ผ์ ๋ํ ์ง์๊ณผ ๋ถ์์ ์ธ ์ฌ๊ณ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ๊ผผ๊ผผํ๊ณ ์ฃผ์ ๊น์ ์ฑ๊ฒฉ๊ณผ ๋ฌผ์ง ๋ณํ์ ๋ํ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 151 |
+
"์ฌ๋ฃยท์ ์์ฌ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 152 |
+
"๋ต๋ณ: ์ฌ๋ฃยท์ ์์ฌ ์ ๊ณต์์๋ ๋ค์ํ ์ฌ๋ฃ์ ๊ตฌ์กฐ์ ํน์ง์ ์ดํดํ๊ณ ์ฐ์
์ ํ์ํ ์ ์์ฌ, ์์ฒด์ฌ๋ฃ, ๊ธฐ๋ฅ์ฌ๋ฃ ๋ฑ์ ์ฒจ๋จ ์ฌ๋ฃ์ ๋ํด ์ฐ๊ตฌํ๊ณ ๊ฐ๋ฐํ๋ ํ๋ฌธ์ด๋ค. ์ฌ๋ฃยท์ ์์ฌ ์ ๊ณต์์๊ฒ๋ ๋ค์ํ ์์ฌ์ ๋ํ ์ดํด์ ์ด๋ฅผ ์ค์ํ์ ์ ์ฉ์ํฌ ์ ์๋ ์์ฉ๋ ฅ, ์คํยท์ค์ต์ ์ํ ๋ถ์์ ์ฌ๊ณ ๋ ฅ์ด ํ์ํ๊ณ , ์ํ, ํํ, ๋ฌผ๋ฆฌ ๋ฑ ๊ธฐ์ด๊ณผํ์ ๊ด์ฌ์ด ์ ์ด์ผ ํ๋ค.\n",
|
| 153 |
+
"์ํ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 154 |
+
"๋ต๋ณ: ์ํ ์ ๊ณต์ ์ธ์ฒด์์ ๋ฐ์ํ๋ ์ง๋ณ์ ์๋ฐฉยท์ง๋จยท์น๋ฃ์ ๊ด๋ จ๋ ์ง์์ ์ต๋ํ๊ณ , ์ด๋ฅผ ํ์ฉํ์ฌ ํ์๋ฅผ ์ง๋ฃํ๋ฉฐ, ์ํ ์ด๋ก ์ ํ๊ตฌํจ์ผ๋ก์จ ์ง๋ณ ์๋ฐฉ ๋ฐ ์น๋ฃ ๋ฐฉ์์ ๋ํ ์ฐ๊ตฌ๋ฅผ ์ํํ๋ค. ์ํ ์ ๊ณต์๋ ํํ ๋ฐ ์๋ช
๊ณผํ์ ๊ธฐ์ด ์ง์๊ณผ ๊ณผํ์ ์ฌ๊ณ ๋ฐฉ ์, ์ํ์ ์ง๋จ๊ณผ ์ฒ์น๋ฅผ ์ํ ์ถ๋ก ๋ ฅ์ด ํ์ํ๊ณ , ํ์์ ๊ณ ํต์ ์ดํดํ๋ฉฐ ์๋ช
์ ๊ตฌ ํ๋ ์ผ์ ๋ณด๋์ ๋๋ผ๊ณ , ์ธ์ฒด์ ๊ตฌ์กฐ์ ๊ธฐ๋ฅ์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 155 |
+
"์ฝํ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 156 |
+
"๋ต๋ณ: ์ฝํ ์ ๊ณต์ ์ง๋ณ์ ์๋ฐฉ ๋ฐ ์น๋ฃ์ ์ฌ์ฉ๋๋ ์์ฝํ์ ๊ดํ ๊ธฐ์ด ์ด๋ก ๊ณผ ์์ฝํ์ ๊ฐ๋ฐ, ์์ฐ, ์กฐ์ ์ ๊ด๋ จ๋ ์ง์์ ์ต๋ํ๊ณ ์ด์ ๋ํด ์ฐ๊ตฌํ๋ค. ์ฝํ ์ ๊ณต์๋ ์คํ ๋ฐ ์ค์ต์์์ ์นจ์ฐฉํจ, ๊น์ ์ฃผ์๋ ฅ ๊ทธ๋ฆฌ๊ณ ๋ถ์์ ์ธ ์ฌ๊ณ ๋ ฅ์ด ํ์ํ๊ณ , ํํ, ์๋ช
๊ณผํ์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 157 |
+
"๊ฐํธ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 158 |
+
"๋ต๋ณ: ๊ฐํธ ์ ๊ณต์ ์ธ๊ฐ์ด ๊ฑด๊ฐ์ ์ ์ง ๋ฐ ์ฆ์งํ๊ณ ์ง๋ณ์ผ๋ก๋ถํฐ ํ๋ณตํ ์ ์๋๋ก ํ๋ ์ด๋ก ๊ณผ ์ค๋ฌด ๊ทธ๋ฆฌ๊ณ ๊ฐํธ ๋์์์ ๊ฐํธ ๋ฌธ์ ๋ฅผ ์ง๋จ, ์ค์ฌ, ํ๊ฐํ ์ ์๋ ๋ฅ๋ ฅ์ ๊ฐ๋ฐ ํ๊ณ ์ ์ฉํ๋ค. ๊ฐํธ ์ ๊ณต์๋ ํํ ๋ฐ ์๋ช
๊ณผํ์ ๋ํ ์ดํด์ ํ์ ๏ฟฝ๏ฟฝํธ๋ฅผ ์ํ ์์ฌ ์ํต ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์ง๋ณ๊ณผ ํ์์ ๊ณ ํต์ ๊ด์ฌ๊ณผ ๋ฐฐ๋ ค๊ฐ ์์ด์ผ ํ๋ค.\n",
|
| 159 |
+
"๋ณด๊ฑดยท์์์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 160 |
+
"๋ต๋ณ: ๋ณด๊ฑดยท์์ ์ ๊ณต์ ๊ณต์ค๋ณด๊ฑด, ๊ฐ์ธ ์์ ๋ฑ ๊ฑด๊ฐ์ ํฅ์์ํค๊ธฐ ์ํ ๋ฐฉ์์ ํ์ํ๊ณ , ์ ์ฒด์ ยท์ ์ ์ ์ฅ์ ๋ฅผ ๊ฐ์ง ์ฌ๋๋ค์ ๋ณด์กฐํ๋ ์น๋ฃ ์์ค๊ณผ ์ฒจ๋จ ์๋ฃ ์ฅ๋น๋ฅผ ๋ค๋ฃจ๊ณ ๋ง๋๋ ๊ธฐ์ ์ ๋ํด ์ฐ๊ตฌํ๋ค. ๋ณด๊ฑดยท์์ ์ ๊ณต์๋ ๊ธฐ์ด๊ณผํ์ ๋ํ ์ง์์ ๋ฐํ์ผ๋ก ๊ณต์ค๋ณด๊ฑด๊ณผ ๊ฐ์ธ ์์์ ๋ํ ์ดํด ๋ฅ๋ ฅ, ํ์ ์ํ์ ๋ฐ๋ผ ์ ์ ํ ์น๋ฃ ๋ฐฉ๋ฒ์ ์ํํ ๋ ์คํ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ๋ณด๊ฑด์๋ฃ์ธ์ผ๋ก์์ ์ฌ๋ช
๊ฐ์ด ์์ด์ผ ํ๋ค.\n",
|
| 161 |
+
"๋ฌด์ฉ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 162 |
+
"๋ต๋ณ: ๋ฌด์ฉ ์ ๊ณต์์๋ ๋ฌด์ฉ ์ด๋ก ๊ณผ ๊ด๋ จ๋ ์ ๋ฌธ ์ง์์ ์ต๋ํ๊ณ . ์ค๊ธฐ ๊ต์ก์ ํตํด ๊ท ํ ์๋ ์ ์ฒด์ ์์ง์์ ์ฐ๋งํ์ฌ ๋ฌด์ฉ ์ํ์ ๋ํ ํด์๋ ฅ๊ณผ ์์ ์ ํํ๋ ฅ ๋ฑ์ ํจ์ํ ๋ค. ๋ฐ๋ผ์ ์์
๊ณผ ์ฃผ์ ์ ๋ง์ถฐ ๋ชธ์ผ๋ก ํํํ ์ ์๋ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ๊ฐํ ์ฒด๋ ฅ๊ณผ ์ ์ฐํ ์ ์ฒด์กฐ๊ฑด์ ๊ฐ์ถ๊ธฐ ์ํ ๋๊ธฐ์ ์ธ๋ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 163 |
+
"์ฒด์ก์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 164 |
+
"๋ต๋ณ: ์ฒด์ก ์ ๊ณต์์๋ ์ ์ฒดํ๋์ ๋ฐํ์ผ๋ก ํ๋ ์ด๋, ๊ฑด๊ฐ, ์คํฌ์ธ ๊ณผํ, ์ฌ๊ฐ์ ๋ํ ์ด๋ก ์ ๋ฐฐ์ฐ๊ณ ์ด์ ๊ด๋ จํ ํน์ ์ค๊ธฐ ์ค์ต์ ์ํํ์ฌ ์ ์ฒด์ ๋ฅ๋ ฅ์ ๊ณ๋ฐํ๋ค. ์ฒด์ก์ ์ ๊ณตํ๊ธฐ ์ํด์๋ ๊ฐ์ธํ ์ฒด๋ ฅ ๋ฐ ์ ์ ๋ ฅ ๊ทธ๋ฆฌ๊ณ ๊ฒฝ๊ธฐ๋ฅผ ์ ๋นํ๊ฒ ์น๋ฅด๊ธฐ ์ํ ์คํฌ์ธ ๋งจ์ญ์ด ํ์ํ๊ณ , ์ด๋ํ๊ธฐ๋ฅผ ์ข์ํ๋ ๋ง์๊ณผ ์คํฌ์ธ ์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 165 |
+
"์ฐ์ยท์ํยท์ฐ๊ทน์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 166 |
+
"๋ต๋ณ: ์ฐ์ยท์ํยท์ฐ๊ทน ์ ๊ณต์ ๋ค์ํ ๋งค์ฒด๋ฅผ ํ์ฉํ์ฌ ์๋ก์ด ์์ ์ธ์ด๋ฅผ ์ฐฝ์ถํ๊ณ ์ํ, ์ฐ๊ทน ๋ฑ์ ๋ถ์ผ์์ ์ธ๊ฐ์ ์ ์๋ฅผ ์ ๋ฌํ ์ ์๋ ํ๋์ ์ํํ๋ค. ๋ฐ๋ผ์ ๋ฐฐ์ญ์ ๋ ํ ๋ถ์๋ ฅ, ์ฐ๊ธฐ๋ ฅ, ์์ ์ ์ง์์ด ํ์ํ๊ณ , ์๊ฐ์ ๋งค์ฒด๋ฅผ ํตํด ํํํ๋ ๋ฅ๋ ฅ์ด ํ์ ํ๋ฉฐ ๋ฏธ์ , ์์
, ๋ฌด์ฉ, ์ดํ, ์ฐ๊ทน, ์ฌํํ, ์ธ๋ก ํ ๋ฑ ๋ค์ํ ๋ถ์ผ์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n",
|
| 167 |
+
"์์
์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 168 |
+
"๋ต๋ณ: ์์
์ ๊ณต์์๋ ์์
์ ๋ฐ์ ๊ดํ ์ด๋ก ํ์ต๊ณผ ๋ค์ํ ์์
๊ด๋ จ ๊ธฐ๊ต์ ํ๋ จ์ ํต ํด ๊ธฐ์กด ์์
์ ์ฐ์ฃผํ๊ฑฐ๋ ์๋ก์ด ์์
์ ์ฐฝ์ํ๋ฉฐ, ์ ๋ฌธ์ ์ผ๋ก ๊ฐ์ ๋ฐ ๋นํํ๋ ๋ฅ๋ ฅ ์ ๊ณ๋ฐํ๋ค. ๊ทธ๋ฌ๋ฏ๋ก ์ฒญ์๋ฅ๋ ฅ, ์ฐฝ์๋ ฅ, ์์
์ ์๋ฆฌ๋ ์
๊ธฐ๋ก ํํํ ์ ์๋ ์ฐ์ฃผ ๋ฅ ๋ ฅ์ด ํ์ํ๊ณ , ์๊ฐ๊ณผ ์์
์ ๊ฐ์์ฑ์ด ์์ด์ผ ํ๋ค.\n",
|
| 169 |
+
"๋ฏธ์ ยท๊ณต์ยท๋์์ธ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 170 |
+
"๋ต๋ณ: ๋ฏธ์ ยท๊ณต์ยท๋์์ธ ์ ๊ณต์ ๋ฏธ์ ์ ๋ฐ์ ๋ํ ์ง์๊ณผ ์ค๊ธฐ ๊ต์ก์ ํตํด ๋ฏธ์ ์ ์์๋ ฅ ๊ณผ ์ฐฝ์์ฑ ๋ฐ ์ฌ๋ฌ ๊ฐ์ง ํํ ๋ฐฉ๋ฒ์ ์ต๋ํ๊ณ , ๊ธฐ๋ฅ์ ยท์ฌ๋ฏธ์ ์กฐ๊ฑด์ ๊ณ ๋ คํ์ฌ ๊ฐ์ข
์ ํ์ ๋์์ธ์ ๊ฐ๋ฐํ๊ณ ์ฐ๊ตฌํ๋ค. ๋ฐ๋ผ์ ๋ฏธ์ ์ํ์ ๋ง๋ค๊ธฐ ์ํ ๋
์ฐฝ์ฑ๊ณผ ์ฐฝ์์ฑ, ์ ๊ตํ ๊ธฐ๋ฒ ํ์ฉ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์์ ๋ถ์ผ์์์ ๋ณํ์ ์ ํ์ ๋ฏผ๊ฐํด์ผ ํ๋ค.\n",
|
| 171 |
+
"๋์งํธ์ฝํ
์ธ ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 172 |
+
"๋ต๋ณ: ๋์งํธ์ฝํ
์ธ ์ ๊ณต์์๋ ๋ด๋ฏธ๋์ด ๋งค์ฒด์ธ ๋์งํธ ๊ฒ์, ๋์งํธ ์์, ์ ๋๋ฉ์ด์
, ์ธํฐ๋ํฐ๋ธ ๋์์ธ์ฝํ
์ธ ๋ฑ์ ๊ธฐํ๊ณผ ์ฐ์ถ์ ์ํํ๊ณ , ๋ค์ํ ์๊ฐ์ ํํ๊ณผ ๋์์ธ ์ ํ์ ๋ฐ ์ฐฝ์ถํ๋ค. ๊ทธ๋ฌ๋ฏ๋ก ๋์งํธ์ฝํ
์ธ ์ ๊ณต์๊ฐ ๋๊ธฐ ์ํด์๋ ๋์งํธ ๊ธฐ์ ์ ๋ ํ ์ง์, ๋์งํธ์ฝํ
์ธ ์ ์ ๋ฅ๋ ฅ, ํธ๋ ๋๋ฅผ ํ์
ํ๋ ๋ฅ๋ ฅ, ์ฐฝ์๋ ฅ, ๊ธฐํ๋ ฅ์ด ํ์ํ๊ณ , ๋ค์ํ ๋งค์ฒด์ ๋ํ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.\n"
|
| 173 |
+
]
|
| 174 |
+
},
|
| 175 |
+
{
|
| 176 |
+
"name": "stderr",
|
| 177 |
+
"output_type": "stream",
|
| 178 |
+
"text": [
|
| 179 |
+
"\n"
|
| 180 |
+
]
|
| 181 |
+
}
|
| 182 |
+
],
|
| 183 |
+
"source": [
|
| 184 |
+
"tokenized_dataset = dataset.map(tokenize)"
|
| 185 |
+
]
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"cell_type": "code",
|
| 189 |
+
"execution_count": 10,
|
| 190 |
+
"id": "6fda32f9-7048-446f-8fc8-818aaff3c904",
|
| 191 |
+
"metadata": {},
|
| 192 |
+
"outputs": [
|
| 193 |
+
{
|
| 194 |
+
"name": "stderr",
|
| 195 |
+
"output_type": "stream",
|
| 196 |
+
"text": [
|
| 197 |
+
"/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/torch/utils/data/dataloader.py:683: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.\n",
|
| 198 |
+
" warnings.warn(warn_msg)\n"
|
| 199 |
+
]
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"data": {
|
| 203 |
+
"text/html": [
|
| 204 |
+
"\n",
|
| 205 |
+
" <div>\n",
|
| 206 |
+
" \n",
|
| 207 |
+
" <progress value='5' max='250' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
| 208 |
+
" [ 5/250 07:56 < 10:48:26, 0.01 it/s, Epoch 0.08/5]\n",
|
| 209 |
+
" </div>\n",
|
| 210 |
+
" <table border=\"1\" class=\"dataframe\">\n",
|
| 211 |
+
" <thead>\n",
|
| 212 |
+
" <tr style=\"text-align: left;\">\n",
|
| 213 |
+
" <th>Step</th>\n",
|
| 214 |
+
" <th>Training Loss</th>\n",
|
| 215 |
+
" </tr>\n",
|
| 216 |
+
" </thead>\n",
|
| 217 |
+
" <tbody>\n",
|
| 218 |
+
" </tbody>\n",
|
| 219 |
+
"</table><p>"
|
| 220 |
+
],
|
| 221 |
+
"text/plain": [
|
| 222 |
+
"<IPython.core.display.HTML object>"
|
| 223 |
+
]
|
| 224 |
+
},
|
| 225 |
+
"metadata": {},
|
| 226 |
+
"output_type": "display_data"
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"ename": "KeyboardInterrupt",
|
| 230 |
+
"evalue": "",
|
| 231 |
+
"output_type": "error",
|
| 232 |
+
"traceback": [
|
| 233 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
| 234 |
+
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
| 235 |
+
"Cell \u001b[0;32mIn[10], line 17\u001b[0m\n\u001b[1;32m 1\u001b[0m training_args \u001b[38;5;241m=\u001b[39m TrainingArguments(\n\u001b[1;32m 2\u001b[0m output_dir\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./kcmii_definition_ft_mps\u001b[39m\u001b[38;5;124m\"\u001b[39m, \n\u001b[1;32m 3\u001b[0m per_device_train_batch_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m, \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 8\u001b[0m use_mps_device\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m \n\u001b[1;32m 9\u001b[0m )\n\u001b[1;32m 11\u001b[0m trainer \u001b[38;5;241m=\u001b[39m Trainer(\n\u001b[1;32m 12\u001b[0m model\u001b[38;5;241m=\u001b[39mmodel,\n\u001b[1;32m 13\u001b[0m args\u001b[38;5;241m=\u001b[39mtraining_args,\n\u001b[1;32m 14\u001b[0m train_dataset\u001b[38;5;241m=\u001b[39mtokenized_dataset\n\u001b[1;32m 15\u001b[0m )\n\u001b[0;32m---> 17\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 19\u001b[0m model\u001b[38;5;241m.\u001b[39msave_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./kcmii_definition_ft_mps\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 20\u001b[0m tokenizer\u001b[38;5;241m.\u001b[39msave_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./kcmii_definition_ft_mps\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
|
| 236 |
+
"File \u001b[0;32m/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/transformers/trainer.py:2245\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 2243\u001b[0m hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n\u001b[1;32m 2244\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 2245\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2246\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2247\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2248\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2249\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2250\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
|
| 237 |
+
"File \u001b[0;32m/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/transformers/trainer.py:2611\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 2607\u001b[0m grad_norm \u001b[38;5;241m=\u001b[39m _grad_norm\n\u001b[1;32m 2609\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_pre_optimizer_step(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n\u001b[0;32m-> 2611\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptimizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstep\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2613\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_optimizer_step(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n\u001b[1;32m 2615\u001b[0m \u001b[38;5;66;03m# get leaning rate before update\u001b[39;00m\n",
|
| 238 |
+
"File \u001b[0;32m/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/accelerate/optimizer.py:178\u001b[0m, in \u001b[0;36mAcceleratedOptimizer.step\u001b[0;34m(self, closure)\u001b[0m\n\u001b[1;32m 176\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_accelerate_step_called \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 177\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 178\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptimizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstep\u001b[49m\u001b[43m(\u001b[49m\u001b[43mclosure\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 179\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maccelerator_state\u001b[38;5;241m.\u001b[39mdistributed_type \u001b[38;5;241m==\u001b[39m DistributedType\u001b[38;5;241m.\u001b[39mXLA:\n\u001b[1;32m 180\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgradient_state\u001b[38;5;241m.\u001b[39mis_xla_gradients_synced \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n",
|
| 239 |
+
"File \u001b[0;32m/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:124\u001b[0m, in \u001b[0;36mLRScheduler.__init__.<locals>.patch_track_step_called.<locals>.wrap_step.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 122\u001b[0m opt \u001b[38;5;241m=\u001b[39m opt_ref()\n\u001b[1;32m 123\u001b[0m opt\u001b[38;5;241m.\u001b[39m_opt_called \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m \u001b[38;5;66;03m# type: ignore[union-attr]\u001b[39;00m\n\u001b[0;32m--> 124\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__get__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mopt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;18;43m__class__\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
| 240 |
+
"File \u001b[0;32m/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/torch/optim/optimizer.py:485\u001b[0m, in \u001b[0;36mOptimizer.profile_hook_step.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 480\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 481\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 482\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfunc\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m must return None or a tuple of (new_args, new_kwargs), but got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresult\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 483\u001b[0m )\n\u001b[0;32m--> 485\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 486\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_optimizer_step_code()\n\u001b[1;32m 488\u001b[0m \u001b[38;5;66;03m# call optimizer step post hooks\u001b[39;00m\n",
|
| 241 |
+
"File \u001b[0;32m/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/torch/optim/optimizer.py:79\u001b[0m, in \u001b[0;36m_use_grad_for_differentiable.<locals>._use_grad\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 77\u001b[0m torch\u001b[38;5;241m.\u001b[39mset_grad_enabled(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefaults[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdifferentiable\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 78\u001b[0m torch\u001b[38;5;241m.\u001b[39m_dynamo\u001b[38;5;241m.\u001b[39mgraph_break()\n\u001b[0;32m---> 79\u001b[0m ret \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 80\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 81\u001b[0m torch\u001b[38;5;241m.\u001b[39m_dynamo\u001b[38;5;241m.\u001b[39mgraph_break()\n",
|
| 242 |
+
"File \u001b[0;32m/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/torch/optim/adam.py:246\u001b[0m, in \u001b[0;36mAdam.step\u001b[0;34m(self, closure)\u001b[0m\n\u001b[1;32m 234\u001b[0m beta1, beta2 \u001b[38;5;241m=\u001b[39m group[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbetas\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 236\u001b[0m has_complex \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_init_group(\n\u001b[1;32m 237\u001b[0m group,\n\u001b[1;32m 238\u001b[0m params_with_grad,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 243\u001b[0m state_steps,\n\u001b[1;32m 244\u001b[0m )\n\u001b[0;32m--> 246\u001b[0m \u001b[43madam\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 247\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams_with_grad\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 248\u001b[0m \u001b[43m \u001b[49m\u001b[43mgrads\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 249\u001b[0m \u001b[43m \u001b[49m\u001b[43mexp_avgs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 250\u001b[0m \u001b[43m \u001b[49m\u001b[43mexp_avg_sqs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 251\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_exp_avg_sqs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 252\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate_steps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 253\u001b[0m \u001b[43m \u001b[49m\u001b[43mamsgrad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mamsgrad\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 254\u001b[0m \u001b[43m \u001b[49m\u001b[43mhas_complex\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhas_complex\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 255\u001b[0m \u001b[43m \u001b[49m\u001b[43mbeta1\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbeta1\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 256\u001b[0m \u001b[43m \u001b[49m\u001b[43mbeta2\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbeta2\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 257\u001b[0m \u001b[43m \u001b[49m\u001b[43mlr\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlr\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 258\u001b[0m \u001b[43m \u001b[49m\u001b[43mweight_decay\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mweight_decay\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 259\u001b[0m \u001b[43m \u001b[49m\u001b[43meps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43meps\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 260\u001b[0m \u001b[43m \u001b[49m\u001b[43mmaximize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmaximize\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 261\u001b[0m \u001b[43m \u001b[49m\u001b[43mforeach\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mforeach\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 262\u001b[0m \u001b[43m \u001b[49m\u001b[43mcapturable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcapturable\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 263\u001b[0m \u001b[43m \u001b[49m\u001b[43mdifferentiable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdifferentiable\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 264\u001b[0m \u001b[43m \u001b[49m\u001b[43mfused\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfused\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 265\u001b[0m \u001b[43m \u001b[49m\u001b[43mgrad_scale\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mgrad_scale\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 266\u001b[0m \u001b[43m \u001b[49m\u001b[43mfound_inf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfound_inf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 267\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecoupled_weight_decay\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdecoupled_weight_decay\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 268\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 270\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m loss\n",
|
| 243 |
+
"File \u001b[0;32m/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/torch/optim/optimizer.py:147\u001b[0m, in \u001b[0;36m_disable_dynamo_if_unsupported.<locals>.wrapper.<locals>.maybe_fallback\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m disabled_func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 146\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 147\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
| 244 |
+
"File \u001b[0;32m/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/torch/optim/adam.py:933\u001b[0m, in \u001b[0;36madam\u001b[0;34m(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps, foreach, capturable, differentiable, fused, grad_scale, found_inf, has_complex, decoupled_weight_decay, amsgrad, beta1, beta2, lr, weight_decay, eps, maximize)\u001b[0m\n\u001b[1;32m 930\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 931\u001b[0m func \u001b[38;5;241m=\u001b[39m _single_tensor_adam\n\u001b[0;32m--> 933\u001b[0m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 934\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 935\u001b[0m \u001b[43m \u001b[49m\u001b[43mgrads\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 936\u001b[0m \u001b[43m \u001b[49m\u001b[43mexp_avgs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 937\u001b[0m \u001b[43m \u001b[49m\u001b[43mexp_avg_sqs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 938\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_exp_avg_sqs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 939\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate_steps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 940\u001b[0m \u001b[43m \u001b[49m\u001b[43mamsgrad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mamsgrad\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 941\u001b[0m \u001b[43m \u001b[49m\u001b[43mhas_complex\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhas_complex\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 942\u001b[0m \u001b[43m \u001b[49m\u001b[43mbeta1\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbeta1\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 943\u001b[0m \u001b[43m \u001b[49m\u001b[43mbeta2\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbeta2\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 944\u001b[0m \u001b[43m \u001b[49m\u001b[43mlr\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlr\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 945\u001b[0m \u001b[43m \u001b[49m\u001b[43mweight_decay\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweight_decay\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 946\u001b[0m \u001b[43m \u001b[49m\u001b[43meps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43meps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 947\u001b[0m \u001b[43m \u001b[49m\u001b[43mmaximize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaximize\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 948\u001b[0m \u001b[43m \u001b[49m\u001b[43mcapturable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcapturable\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 949\u001b[0m \u001b[43m \u001b[49m\u001b[43mdifferentiable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdifferentiable\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 950\u001b[0m \u001b[43m \u001b[49m\u001b[43mgrad_scale\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgrad_scale\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 951\u001b[0m \u001b[43m \u001b[49m\u001b[43mfound_inf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfound_inf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 952\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecoupled_weight_decay\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecoupled_weight_decay\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 953\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
|
| 245 |
+
"File \u001b[0;32m/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/torch/optim/adam.py:456\u001b[0m, in \u001b[0;36m_single_tensor_adam\u001b[0;34m(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps, grad_scale, found_inf, amsgrad, has_complex, beta1, beta2, lr, weight_decay, eps, maximize, capturable, differentiable, decoupled_weight_decay)\u001b[0m\n\u001b[1;32m 454\u001b[0m exp_avg_sq\u001b[38;5;241m.\u001b[39mmul_(beta2)\u001b[38;5;241m.\u001b[39maddcmul_(grad, grad, value\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m \u001b[38;5;241m-\u001b[39m beta2)\n\u001b[1;32m 455\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 456\u001b[0m \u001b[43mexp_avg_sq\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmul_\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbeta2\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39maddcmul_(grad, grad, value\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m \u001b[38;5;241m-\u001b[39m beta2)\n\u001b[1;32m 458\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m capturable \u001b[38;5;129;01mor\u001b[39;00m differentiable:\n\u001b[1;32m 459\u001b[0m step \u001b[38;5;241m=\u001b[39m step_t\n",
|
| 246 |
+
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
| 247 |
+
]
|
| 248 |
+
}
|
| 249 |
+
],
|
| 250 |
+
"source": [
|
| 251 |
+
"training_args = TrainingArguments(\n",
|
| 252 |
+
" output_dir=\"./kcmii_definition_ft_mps\", \n",
|
| 253 |
+
" per_device_train_batch_size=1, \n",
|
| 254 |
+
" num_train_epochs=5,\n",
|
| 255 |
+
" logging_steps=10,\n",
|
| 256 |
+
" save_strategy=\"epoch\",\n",
|
| 257 |
+
" optim=\"adamw_torch\",\n",
|
| 258 |
+
" use_mps_device=True \n",
|
| 259 |
+
")\n",
|
| 260 |
+
"\n",
|
| 261 |
+
"trainer = Trainer(\n",
|
| 262 |
+
" model=model,\n",
|
| 263 |
+
" args=training_args,\n",
|
| 264 |
+
" train_dataset=tokenized_dataset\n",
|
| 265 |
+
")\n",
|
| 266 |
+
"\n",
|
| 267 |
+
"trainer.train()\n",
|
| 268 |
+
"\n",
|
| 269 |
+
"model.save_pretrained(\"./kcmii_definition_ft_mps\")\n",
|
| 270 |
+
"tokenizer.save_pretrained(\"./kcmii_definition_ft_mps\")"
|
| 271 |
+
]
|
| 272 |
+
},
|
| 273 |
+
{
|
| 274 |
+
"cell_type": "markdown",
|
| 275 |
+
"id": "3771df36-b3a5-40d4-870b-3e182c6f4a36",
|
| 276 |
+
"metadata": {},
|
| 277 |
+
"source": [
|
| 278 |
+
"# ํ์ต ์๊ฐ์ด ๋๋ฌด ์ค๋ ๊ฑธ๋ฆผ...."
|
| 279 |
+
]
|
| 280 |
+
},
|
| 281 |
+
{
|
| 282 |
+
"cell_type": "code",
|
| 283 |
+
"execution_count": null,
|
| 284 |
+
"id": "eb74a226-d7de-47cd-965c-b60b5872353c",
|
| 285 |
+
"metadata": {},
|
| 286 |
+
"outputs": [],
|
| 287 |
+
"source": []
|
| 288 |
+
}
|
| 289 |
+
],
|
| 290 |
+
"metadata": {
|
| 291 |
+
"kernelspec": {
|
| 292 |
+
"display_name": "Python 3 (ipykernel)",
|
| 293 |
+
"language": "python",
|
| 294 |
+
"name": "python3"
|
| 295 |
+
},
|
| 296 |
+
"language_info": {
|
| 297 |
+
"codemirror_mode": {
|
| 298 |
+
"name": "ipython",
|
| 299 |
+
"version": 3
|
| 300 |
+
},
|
| 301 |
+
"file_extension": ".py",
|
| 302 |
+
"mimetype": "text/x-python",
|
| 303 |
+
"name": "python",
|
| 304 |
+
"nbconvert_exporter": "python",
|
| 305 |
+
"pygments_lexer": "ipython3",
|
| 306 |
+
"version": "3.10.16"
|
| 307 |
+
}
|
| 308 |
+
},
|
| 309 |
+
"nbformat": 4,
|
| 310 |
+
"nbformat_minor": 5
|
| 311 |
+
}
|
prototype/kcmii_lm_rag.ipynb
ADDED
|
@@ -0,0 +1,483 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"id": "9d6fc17a-d548-4ab6-b5d3-51e4b60960bb",
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"source": [
|
| 8 |
+
"### full-fine-tuning ํ๊ธฐ์ ํ์ต ์๊ฐ์ด ๋๋ฌด ์ค๋๊ฑธ๋ฆผ\n",
|
| 9 |
+
"### RAG ๊ฒ์ ๊ธฐ๋ฐ ํ์ฉํด ๊ฐ๋ฐํด๋ณด๋ ๊ฒ์ผ๋ก ์งํ"
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "markdown",
|
| 14 |
+
"id": "33f8253d-352f-4ac6-9205-59b0e773be77",
|
| 15 |
+
"metadata": {},
|
| 16 |
+
"source": [
|
| 17 |
+
"### ํค์๋ ๊ธฐ๋ฐ์ RAG ๊ตฌ์ถ \n",
|
| 18 |
+
"\n",
|
| 19 |
+
"### ํ๋ฆ\n",
|
| 20 |
+
"[์
๋ ฅ: ์ ๊ณต๋ช
3๊ฐ] \n",
|
| 21 |
+
" โ \n",
|
| 22 |
+
"[CSV ๋ฌธ์์์ 3๊ฐ ์ ๊ณต ์ ๋ณด ์ถ์ถ] โ (Keyword-based Retriever) \n",
|
| 23 |
+
" โ \n",
|
| 24 |
+
"[์ ๊ณต ์ ๋ณด๋ค์ ์ฐ๊ฒฐํ์ฌ ํ๋กฌํํธ ์์ฑ] โ (Prompt Composer) \n",
|
| 25 |
+
" โ \n",
|
| 26 |
+
"[LLM์๊ฒ ์ ๋ฌํ์ฌ ์๊ธฐ๋ถ ๋ฌธ๊ตฌ ์์ฑ] โ (Generator)"
|
| 27 |
+
]
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"cell_type": "code",
|
| 31 |
+
"execution_count": 4,
|
| 32 |
+
"id": "cbe0926d-ccec-4942-8a9e-bf5f940d1a0b",
|
| 33 |
+
"metadata": {},
|
| 34 |
+
"outputs": [
|
| 35 |
+
{
|
| 36 |
+
"data": {
|
| 37 |
+
"text/plain": [
|
| 38 |
+
"(61,\n",
|
| 39 |
+
" '[์ ๊ณต๋ช
: ์ด๋ฌธํ]\\n์ ๊ณต์ค๋ช
: ๋์์ ์ธ์ด์ ๊ตฌ์กฐ์ ๋ฌธํ์ ํํ์ ์ฒด๊ณ์ ์ผ๋ก ํ๊ตฌํ๊ณ , ๋ค์ํ ๋ฌธํ์ ๋งฅ๋ฝ์ ์ดํดํ๋ ํ๋ฌธ์ด๋ค.\\n์๊ตฌ์ญ๋: ์ธ์ด์ ๊ฐ๊ฐ, ๋
ผ๋ฆฌ์ ์ฌ๊ณ ๋ ฅ, ๋นํ์ ๋ถ์ ๋ฅ๋ ฅ\\n์ ๊ณต๊ด์ฌ: ์ธ์ด์ ๋ฌธํ, ๋ค์ํ ๊ตญ๊ฐ์ ์ฌํยท๋ฌธํ์ ํน์ฑ\\n๊ด๋ จ์ง๋ก: ์์ค๊ฐ, ๋ฐฉ์ก์๊ฐ, ๋ฒ์ญ๊ฐ, ๋ฌธํ๋นํ๊ฐ, ํต์ญ์ฌ, ์ธ์ดํ์')"
|
| 40 |
+
]
|
| 41 |
+
},
|
| 42 |
+
"execution_count": 4,
|
| 43 |
+
"metadata": {},
|
| 44 |
+
"output_type": "execute_result"
|
| 45 |
+
}
|
| 46 |
+
],
|
| 47 |
+
"source": [
|
| 48 |
+
"# ์ฌ์ฉ์๋ก๋ถํฐ ์ ๊ณต๋ช
3๊ฐ ์
๋ ฅ ๋ฐ๊ธฐ\n",
|
| 49 |
+
"# csv ๋ฐ์ดํฐ์์ ํด๋น ์ ๊ณต 3๊ฐ์ ์ ๋ณด ์ถ์ถ\n",
|
| 50 |
+
"# ์ถ์ถ๋ ์ ๋ณด๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ํ๋กฌํํธ ๊ตฌ์ฑ\n",
|
| 51 |
+
"import pandas as pd\n",
|
| 52 |
+
"\n",
|
| 53 |
+
"file_path = \"dataset/kcmii_major_rag.csv\"\n",
|
| 54 |
+
"df = pd.read_csv(file_path)\n",
|
| 55 |
+
"\n",
|
| 56 |
+
"def make_major_prompt(df, majors : list) :\n",
|
| 57 |
+
" parts = []\n",
|
| 58 |
+
" for i, major in enumerate(major, 1) : # enumerate(major, 1) ์คํํ
๋๋ฒ ์ค์ \n",
|
| 59 |
+
" row = df[df['์ ๊ณต๋ช
']==major]\n",
|
| 60 |
+
" if row.empty:\n",
|
| 61 |
+
" continue\n",
|
| 62 |
+
" r = row.iloc[o]\n",
|
| 63 |
+
" \n",
|
| 64 |
+
"์ ๊ณต์ค๋ช
: {row['์ ๊ณต์ค๋ช
']}\n",
|
| 65 |
+
"์๊ตฌ์ญ๋: {row['์๊ตฌ์ญ๋']}\n",
|
| 66 |
+
"์ ๊ณต๊ด์ฌ: {row['์ ๊ณต๊ด์ฌ']}\n",
|
| 67 |
+
"๊ด๋ จ์ง๋ก: {row['๊ด๋ จ์ง๋ก']}\"\"\"\n",
|
| 68 |
+
"\n",
|
| 69 |
+
"# ๋ฌธ์ ๋ฆฌ์คํธ ์์ฑ\n",
|
| 70 |
+
"documents = df.apply(format_row_as_document, axis=1).tolist()\n",
|
| 71 |
+
"\n",
|
| 72 |
+
"# ๋ฌธ์ ์ ํ์ธ ๋ฐ ์ํ ์ถ๋ ฅ\n",
|
| 73 |
+
"len(documents), documents[0]"
|
| 74 |
+
]
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"cell_type": "code",
|
| 78 |
+
"execution_count": 35,
|
| 79 |
+
"id": "dbd6f8a1-aca2-44df-9d37-7377025c7126",
|
| 80 |
+
"metadata": {},
|
| 81 |
+
"outputs": [
|
| 82 |
+
{
|
| 83 |
+
"data": {
|
| 84 |
+
"text/plain": [
|
| 85 |
+
"pandas.core.series.Series"
|
| 86 |
+
]
|
| 87 |
+
},
|
| 88 |
+
"execution_count": 35,
|
| 89 |
+
"metadata": {},
|
| 90 |
+
"output_type": "execute_result"
|
| 91 |
+
}
|
| 92 |
+
],
|
| 93 |
+
"source": []
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"cell_type": "code",
|
| 97 |
+
"execution_count": 5,
|
| 98 |
+
"id": "52fe7db0-e1e2-4f86-9eb7-55bf794ecb3d",
|
| 99 |
+
"metadata": {},
|
| 100 |
+
"outputs": [],
|
| 101 |
+
"source": [
|
| 102 |
+
"import pandas as pd\n",
|
| 103 |
+
"file_path = \"dataset/kcmii_major_rag.csv\"\n",
|
| 104 |
+
"df = pd.read_csv(file_path)\n",
|
| 105 |
+
"\n",
|
| 106 |
+
"majors = [\"์ฌ๋ฆฌ\", \"๊ต์ก\", \"์ฌํ๋ณต์ง\"]\n",
|
| 107 |
+
"parts = []\n",
|
| 108 |
+
"for i, major in enumerate(majors, 1):\n",
|
| 109 |
+
" row = df[df['์ ๊ณต๋ช
']==major]\n",
|
| 110 |
+
" r = row.iloc[0] # ์๋ฆฌ์ฆ ๋ณํ\n",
|
| 111 |
+
" part = f\"\"\"{i}. {major}\n",
|
| 112 |
+
"์ ๊ณต์ค๋ช
: {r['์ ๊ณต์ค๋ช
']}\n",
|
| 113 |
+
"์๊ตฌ์ญ๋: {r['์๊ตฌ์ญ๋']}\n",
|
| 114 |
+
"์ ๊ณต๊ด์ฌ: {r['์ ๊ณต๊ด์ฌ']}\n",
|
| 115 |
+
"๊ด๋ จ์ง๋ก: {r['๊ด๋ จ์ง๋ก']}\n",
|
| 116 |
+
"\"\"\"\n",
|
| 117 |
+
" parts.append(part)\n",
|
| 118 |
+
"\n",
|
| 119 |
+
"majors_str = \", \".join(majors)\n",
|
| 120 |
+
"prompt = f\"\"\"OO ํ์์ ์๋์ ์ธ ์ ๊ณต ๋ถ์ผ({majors_str})์ ๋ํด ํฅ๋ฏธ ์์ค์ด ๋์ต๋๋ค.\n",
|
| 121 |
+
"๊ฐ ์ ๊ณต์ ๋ํ ์ ๋ณด๋ฅผ ๋ฐํ์ผ๋ก, ๊ณ ๋ฑํ์ ์ํ๊ธฐ๋ก๋ถ ๋ฌธ๊ตฌ๋ฅผ ์์ฑํด์ฃผ์ธ์:\n",
|
| 122 |
+
"\n",
|
| 123 |
+
"{chr(10).join(parts)}\n",
|
| 124 |
+
"\"\"\""
|
| 125 |
+
]
|
| 126 |
+
},
|
| 127 |
+
{
|
| 128 |
+
"cell_type": "code",
|
| 129 |
+
"execution_count": 6,
|
| 130 |
+
"id": "17933069-cd0d-4b15-b424-410b83063dbc",
|
| 131 |
+
"metadata": {},
|
| 132 |
+
"outputs": [
|
| 133 |
+
{
|
| 134 |
+
"data": {
|
| 135 |
+
"text/plain": [
|
| 136 |
+
"['1. ์ฌ๋ฆฌ\\n์ ๊ณต์ค๋ช
: ์ธ๊ฐ์ ํ๋๊ณผ ์ ์ ๊ณผ์ ์ ๊ณผํ์ ์ผ๋ก ํ๊ตฌํ๋ ํ๋ฌธ์ด๋ค.\\n์๊ตฌ์ญ๋: ๊ณต๊ฐ ๋ฅ๋ ฅ, ๋ถ์๋ ฅ, ๊ด์ฐฐ๋ ฅ\\n์ ๊ณต๊ด์ฌ: ์ธ๊ฐ ํ๋, ๊ฐ์ , ์ธ์ง ๊ธฐ๋ฅ\\n๊ด๋ จ์ง๋ก: ์์์ฌ๋ฆฌ์ฌ, ์๋ด์ฌ, ์กฐ์ง์ฌ๋ฆฌ์ ๋ฌธ๊ฐ, ์ฐ๊ตฌ์, ๊ต์\\n',\n",
|
| 137 |
+
" '2. ๊ต์ก\\n์ ๊ณต์ค๋ช
: ์ธ๊ฐ์ ํ์ต๊ณผ ๋ฐ๋ฌ์ ์ฐ๊ตฌํ๊ณ ํจ๊ณผ์ ์ธ ๊ต์๋ฒ๊ณผ ๊ต์ก ์ ๋๋ฅผ ํ๊ตฌํ๋ ํ๋ฌธ์ด๋ค.\\n์๊ตฌ์ญ๋: ์ํต ๋ฅ๋ ฅ, ์ธ๋ด์ฌ, ๊ด์ฐฐ๋ ฅ\\n์ ๊ณต๊ด์ฌ: ๊ต์ก๊ณผ์ , ์๋ ๋ฐ๋ฌ, ๊ต์ ํ์ต ๋ฐฉ๋ฒ\\n๊ด๋ จ์ง๋ก: ๊ต์ฌ, ๊ต์กํ์ ๊ฐ, ๊ต์ก์ฐ๊ตฌ์, ๊ต์, ๊ต์ก์ปจ์คํดํธ\\n',\n",
|
| 138 |
+
" '3. ์ฌํ๋ณต์ง\\n์ ๊ณต์ค๋ช
: ์ฌํ๋ณต์ง๋ ๋ค์ํ ์ฌํ ๊ตฌ์ฑ์์ ๋ณต์ง์ ์ถ์ ์ง ํฅ์์ ์ํ ์ ๋์ ์ค์ฒ ๋ฐฉ๋ฒ์ ์ฐ๊ตฌํ๋ ํ๋ฌธ์ด๋ค.\\n์๊ตฌ์ญ๋: ๊ณต๊ฐ ๋ฅ๋ ฅ, ๋์ธ๊ด๊ณ๋ฅ๋ ฅ, ๋ฌธ์ ํด๊ฒฐ๋ ฅ\\n์ ๊ณต๊ด์ฌ: ์ฌํ์ ์ฝ์ ์ง์, ๋ณต์ง ์ ์ฑ
, ์ธ๊ฐ ์กด์์ฑ\\n๊ด๋ จ์ง๋ก: ์ฌํ๋ณต์ง์ฌ, ์ฒญ์๋
์ง๋์ฌ, ๋ณต์งํ์ ๊ฐ, ๊ฐ์กฑ์๋ด์ฌ, ๋ค๋ฌธํ์ ๋ฌธ๊ฐ\\n']"
|
| 139 |
+
]
|
| 140 |
+
},
|
| 141 |
+
"execution_count": 6,
|
| 142 |
+
"metadata": {},
|
| 143 |
+
"output_type": "execute_result"
|
| 144 |
+
}
|
| 145 |
+
],
|
| 146 |
+
"source": [
|
| 147 |
+
"parts"
|
| 148 |
+
]
|
| 149 |
+
},
|
| 150 |
+
{
|
| 151 |
+
"cell_type": "code",
|
| 152 |
+
"execution_count": 7,
|
| 153 |
+
"id": "09768d88-b7a6-4ef6-bd3d-e58f23f8e82b",
|
| 154 |
+
"metadata": {},
|
| 155 |
+
"outputs": [
|
| 156 |
+
{
|
| 157 |
+
"data": {
|
| 158 |
+
"text/plain": [
|
| 159 |
+
"'OO ํ์์ ์๋์ ์ธ ์ ๊ณต ๋ถ์ผ(์ฌ๋ฆฌ, ๊ต์ก, ์ฌํ๋ณต์ง)์ ๋ํด ํฅ๋ฏธ ์์ค์ด ๋์ต๋๋ค.\\n๊ฐ ์ ๊ณต์ ๋ํ ์ ๋ณด๋ฅผ ๋ฐํ์ผ๋ก, ๊ณ ๋ฑํ์ ์ํ๊ธฐ๋ก๋ถ ๋ฌธ๊ตฌ๋ฅผ ์์ฑํด์ฃผ์ธ์:\\n\\n1. ์ฌ๋ฆฌ\\n์ ๊ณต์ค๋ช
: ์ธ๊ฐ์ ํ๋๊ณผ ์ ์ ๊ณผ์ ์ ๊ณผํ์ ์ผ๋ก ํ๊ตฌํ๋ ํ๋ฌธ์ด๋ค.\\n์๊ตฌ์ญ๋: ๊ณต๊ฐ ๋ฅ๋ ฅ, ๋ถ์๋ ฅ, ๊ด์ฐฐ๋ ฅ\\n์ ๊ณต๊ด์ฌ: ์ธ๊ฐ ํ๋, ๊ฐ์ , ์ธ์ง ๊ธฐ๋ฅ\\n๊ด๋ จ์ง๋ก: ์์์ฌ๋ฆฌ์ฌ, ์๋ด์ฌ, ์กฐ์ง์ฌ๋ฆฌ์ ๋ฌธ๊ฐ, ์ฐ๊ตฌ์, ๊ต์\\n\\n2. ๊ต์ก\\n์ ๊ณต์ค๋ช
: ์ธ๊ฐ์ ํ์ต๊ณผ ๋ฐ๋ฌ์ ์ฐ๊ตฌํ๊ณ ํจ๊ณผ์ ์ธ ๊ต์๋ฒ๊ณผ ๊ต์ก ์ ๋๋ฅผ ํ๊ตฌํ๋ ํ๋ฌธ์ด๋ค.\\n์๊ตฌ์ญ๋: ์ํต ๋ฅ๋ ฅ, ์ธ๋ด์ฌ, ๊ด์ฐฐ๋ ฅ\\n์ ๊ณต๊ด์ฌ: ๊ต์ก๊ณผ์ , ์๋ ๋ฐ๋ฌ, ๊ต์ ํ์ต ๋ฐฉ๋ฒ\\n๊ด๋ จ์ง๋ก: ๊ต์ฌ, ๊ต์กํ์ ๊ฐ, ๊ต์ก์ฐ๊ตฌ์, ๊ต์, ๊ต์ก์ปจ์คํดํธ\\n\\n3. ์ฌํ๋ณต์ง\\n์ ๊ณต์ค๋ช
: ์ฌํ๋ณต์ง๋ ๋ค์ํ ์ฌํ ๊ตฌ์ฑ์์ ๋ณต์ง์ ์ถ์ ์ง ํฅ์์ ์ํ ์ ๋์ ์ค์ฒ ๋ฐฉ๋ฒ์ ์ฐ๊ตฌํ๋ ํ๋ฌธ์ด๋ค.\\n์๊ตฌ์ญ๋: ๊ณต๊ฐ ๋ฅ๋ ฅ, ๋์ธ๊ด๊ณ๋ฅ๋ ฅ, ๋ฌธ์ ํด๊ฒฐ๋ ฅ\\n์ ๊ณต๊ด์ฌ: ์ฌํ์ ์ฝ์ ์ง์, ๋ณต์ง ์ ์ฑ
, ์ธ๊ฐ ์กด์์ฑ\\n๊ด๋ จ์ง๋ก: ์ฌํ๋ณต์ง์ฌ, ์ฒญ์๋
์ง๋์ฌ, ๋ณต์งํ์ ๊ฐ, ๊ฐ์กฑ์๋ด์ฌ, ๋ค๋ฌธํ์ ๋ฌธ๊ฐ\\n\\n'"
|
| 160 |
+
]
|
| 161 |
+
},
|
| 162 |
+
"execution_count": 7,
|
| 163 |
+
"metadata": {},
|
| 164 |
+
"output_type": "execute_result"
|
| 165 |
+
}
|
| 166 |
+
],
|
| 167 |
+
"source": [
|
| 168 |
+
"prompt\n",
|
| 169 |
+
"# ๋๋ฌด ๊ธธ๊ฒ๋์์ ํ์ต์ํฌ๋ ๋ถ์ํจ..."
|
| 170 |
+
]
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"cell_type": "code",
|
| 174 |
+
"execution_count": 17,
|
| 175 |
+
"id": "b178159f-cf48-4989-be66-85af3e7ec510",
|
| 176 |
+
"metadata": {},
|
| 177 |
+
"outputs": [],
|
| 178 |
+
"source": [
|
| 179 |
+
"# ์ ํธ ์ ๊ณต 3๊ฐ์ ๊ฐ๋ตํ ์ ๋ณด๋ง ์ถ์ถํด ๋์ค๋ ํ๋กฌํํธ๋ก ์์ \n",
|
| 180 |
+
"file_path = \"dataset/kcmii_major_rag_summarized.csv\"\n",
|
| 181 |
+
"df = pd.read_csv(file_path)\n",
|
| 182 |
+
"\n",
|
| 183 |
+
"import pandas as pd\n",
|
| 184 |
+
"\n",
|
| 185 |
+
"def make_major_prompt(df, majors: list) -> str:\n",
|
| 186 |
+
" parts = []\n",
|
| 187 |
+
" for i, major in enumerate(majors, 1):\n",
|
| 188 |
+
" row = df[df['์ ๊ณต๋ช
'] == major]\n",
|
| 189 |
+
" if row.empty:\n",
|
| 190 |
+
" continue\n",
|
| 191 |
+
" summary = row.iloc[0]['์์ฝ']\n",
|
| 192 |
+
" parts.append(f\"{i}. {major}: {summary}\")\n",
|
| 193 |
+
"\n",
|
| 194 |
+
" majors_str = \", \".join(majors)\n",
|
| 195 |
+
" prompt = f\"\"\"OO ํ์์ ์๋์ ์ธ ์ ๊ณต ๋ถ์ผ({majors_str})์ ํฅ๋ฏธ๋ฅผ ๊ฐ์ง๊ณ ์์ต๋๋ค.\n",
|
| 196 |
+
"์ด ์ ๊ณต๋ค์ ๋ํ ์์ฝ ์ ๋ณด๋ฅผ ์ฐธ๊ณ ํ์ฌ, ํ์์ ํ๋ ์์์ ๋๋ฌ๋ ๊ด์ฌ๊ณผ ์ญ๋ ๋๋ ์ตํฉ ๊ฐ๋ฅ์ฑ์ ๊ณ ๋ คํ์ฌ ๊ณ ๋ฑํ์ ์ํ๊ธฐ๋ก๋ถ ๋ฌธ๊ตฌ๋ฅผ ์์ ํ์ผ๋ก ์์ฑํด์ฃผ์ธ์:\n",
|
| 197 |
+
"\n",
|
| 198 |
+
"{chr(10).join(parts)}\n",
|
| 199 |
+
"\"\"\"\n",
|
| 200 |
+
" return prompt"
|
| 201 |
+
]
|
| 202 |
+
},
|
| 203 |
+
{
|
| 204 |
+
"cell_type": "code",
|
| 205 |
+
"execution_count": 18,
|
| 206 |
+
"id": "6c24ded3-2a88-47ad-89c3-40b5e7c0dc01",
|
| 207 |
+
"metadata": {},
|
| 208 |
+
"outputs": [
|
| 209 |
+
{
|
| 210 |
+
"name": "stdout",
|
| 211 |
+
"output_type": "stream",
|
| 212 |
+
"text": [
|
| 213 |
+
"OO ํ์์ ์๋์ ์ธ ์ ๊ณต ๋ถ์ผ(์ฌ๋ฆฌ, ๊ต์ก, ์ฌํ๋ณต์ง)์ ํฅ๋ฏธ๋ฅผ ๊ฐ์ง๊ณ ์์ต๋๋ค.\n",
|
| 214 |
+
"์ด ์ ๊ณต๋ค์ ๋ํ ์์ฝ ์ ๋ณด๋ฅผ ์ฐธ๊ณ ํ์ฌ, ํ์์ ํ๋ ์์์ ๋๋ฌ๋ ๊ด์ฌ๊ณผ ์ญ๋ ๋๋ ์ตํฉ ๊ฐ๋ฅ์ฑ์ ๊ณ ๋ คํ์ฌ ๊ณ ๋ฑํ์ ์ํ๊ธฐ๋ก๋ถ ๋ฌธ๊ตฌ๋ฅผ ์์ ํ์ผ๋ก ์์ฑํด์ฃผ์ธ์:\n",
|
| 215 |
+
"\n",
|
| 216 |
+
"1. ์ฌ๋ฆฌ: ์ธ๊ฐ์ ํ๋๊ณผ ์ ์ ๊ณผ์ ์ ๊ณผํ์ ์ผ๋ก ํ๊ตฌํ๋ ํ๋ฌธ์ด๋ค. (์ฃผ์ ์ญ๋: ๊ณต๊ฐ ๋ฅ๋ ฅ, ๊ด์ฌ ๋ถ์ผ: ์ธ๊ฐ ํ๋)\n",
|
| 217 |
+
"2. ๊ต์ก: ์ธ๊ฐ์ ํ์ต๊ณผ ๋ฐ๋ฌ์ ์ฐ๊ตฌํ๊ณ ํจ๊ณผ์ ์ธ ๊ต์๋ฒ๊ณผ ๊ต์ก ์ ๋๋ฅผ ํ๊ตฌํ๋ ํ๋ฌธ์ด๋ค. (์ฃผ์ ์ญ๋: ์ํต ๋ฅ๋ ฅ, ๊ด์ฌ ๋ถ์ผ: ๊ต์ก๊ณผ์ )\n",
|
| 218 |
+
"3. ์ฌํ๋ณต์ง: ์ฌํ๋ณต์ง๋ ๋ค์ํ ์ฌํ ๊ตฌ์ฑ์์ ๋ณต์ง์ ์ถ์ ์ง ํฅ์์ ์ํ ์ ๋์ ์ค์ฒ ๋ฐฉ๋ฒ์ ์ฐ๊ตฌํ๋ ํ๋ฌธ์ด๋ค. (์ฃผ์ ์ญ๋: ๊ณต๊ฐ ๋ฅ๋ ฅ, ๊ด์ฌ ๋ถ์ผ: ์ฌํ์ ์ฝ์ ์ง์)\n",
|
| 219 |
+
"\n"
|
| 220 |
+
]
|
| 221 |
+
}
|
| 222 |
+
],
|
| 223 |
+
"source": [
|
| 224 |
+
"example_majors = [\"์ฌ๋ฆฌ\", \"๊ต์ก\", \"์ฌํ๋ณต์ง\"]\n",
|
| 225 |
+
"prompt = make_major_prompt(df, example_majors)\n",
|
| 226 |
+
"print(prompt)"
|
| 227 |
+
]
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
"cell_type": "markdown",
|
| 231 |
+
"id": "6467b751-3228-4948-9042-de65f1a3f118",
|
| 232 |
+
"metadata": {},
|
| 233 |
+
"source": [
|
| 234 |
+
"### 50๊ฐ ์ ๊ณต์ ๋๋ฌด ๋ง์ผ๋ ๊ณตํต์ ์ผ๋ก ๋ฌถ์ด๋ ์ ๊ณต(์ ์ฌ๋ถ์ผ) ์ผ์ด์ค 10๊ฐ๋ง ๋ถ๋ฆฌํด ํ์ต๋ฐ์ดํฐ ์์ฑ\n",
|
| 235 |
+
"### LoRA ๊ฒฝ๋ต ํ์ต์ผ๋ก ๋จผ์ ํ
์คํธ"
|
| 236 |
+
]
|
| 237 |
+
},
|
| 238 |
+
{
|
| 239 |
+
"cell_type": "code",
|
| 240 |
+
"execution_count": 24,
|
| 241 |
+
"id": "bc8b0f50-c6e4-49fe-98a4-3ee8d8ef4b83",
|
| 242 |
+
"metadata": {},
|
| 243 |
+
"outputs": [
|
| 244 |
+
{
|
| 245 |
+
"name": "stdout",
|
| 246 |
+
"output_type": "stream",
|
| 247 |
+
"text": [
|
| 248 |
+
"์ฌ์ฉ ์ค์ธ ๋๋ฐ์ด์ค: mps\n"
|
| 249 |
+
]
|
| 250 |
+
},
|
| 251 |
+
{
|
| 252 |
+
"name": "stdin",
|
| 253 |
+
"output_type": "stream",
|
| 254 |
+
"text": [
|
| 255 |
+
"The repository for LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct contains custom code which must be executed to correctly load the model. You can inspect the repository content at https://hf.co/LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct.\n",
|
| 256 |
+
"You can avoid this prompt in future by passing the argument `trust_remote_code=True`.\n",
|
| 257 |
+
"\n",
|
| 258 |
+
"Do you wish to run the custom code? [y/N] y\n"
|
| 259 |
+
]
|
| 260 |
+
},
|
| 261 |
+
{
|
| 262 |
+
"name": "stderr",
|
| 263 |
+
"output_type": "stream",
|
| 264 |
+
"text": [
|
| 265 |
+
"A new version of the following files was downloaded from https://huggingface.co/LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct:\n",
|
| 266 |
+
"- configuration_exaone.py\n",
|
| 267 |
+
". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n"
|
| 268 |
+
]
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"name": "stdin",
|
| 272 |
+
"output_type": "stream",
|
| 273 |
+
"text": [
|
| 274 |
+
"The repository for LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct contains custom code which must be executed to correctly load the model. You can inspect the repository content at https://hf.co/LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct.\n",
|
| 275 |
+
"You can avoid this prompt in future by passing the argument `trust_remote_code=True`.\n",
|
| 276 |
+
"\n",
|
| 277 |
+
"Do you wish to run the custom code? [y/N] y\n"
|
| 278 |
+
]
|
| 279 |
+
},
|
| 280 |
+
{
|
| 281 |
+
"name": "stderr",
|
| 282 |
+
"output_type": "stream",
|
| 283 |
+
"text": [
|
| 284 |
+
"A new version of the following files was downloaded from https://huggingface.co/LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct:\n",
|
| 285 |
+
"- modeling_exaone.py\n",
|
| 286 |
+
". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n",
|
| 287 |
+
"Fetching 2 files: 100%|โโโโโโโโโโ| 2/2 [02:39<00:00, 79.72s/it] \n",
|
| 288 |
+
"Loading checkpoint shards: 100%|โโโโโโโโโโ| 2/2 [00:00<00:00, 37.62it/s]\n"
|
| 289 |
+
]
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"name": "stdout",
|
| 293 |
+
"output_type": "stream",
|
| 294 |
+
"text": [
|
| 295 |
+
"\n",
|
| 296 |
+
"๐ ์์ฑ๋ ๋ฌธ์ฅ:\n",
|
| 297 |
+
"\n",
|
| 298 |
+
"### ๋ช
๋ น:\n",
|
| 299 |
+
"๋ค์ ์ ๊ณต ์ ๋ณด๋ฅผ ๋ฐํ์ผ๋ก ํ์์ ํฅ๋ฏธ์ ํ๋์ ๋ฐ์ํ ์๊ธฐ๋ถ ๋ฌธ๊ตฌ๋ฅผ ์์ฑํ์ธ์.\n",
|
| 300 |
+
"\n",
|
| 301 |
+
"### ์
๋ ฅ:\n",
|
| 302 |
+
"1. ์ฌ๋ฆฌ: ์ธ๊ฐ์ ํ๋๊ณผ ์ ์ ๊ณผ์ ์ ๊ณผํ์ ์ผ๋ก ํ๊ตฌํ๋ ํ๋ฌธ์ด๋ค. (์ฃผ์ ์ญ๋: ๊ณต๊ฐ๋ฅ๋ ฅ, ๊ด์ฌ ๋ถ์ผ: ๊ฐ์ )\n",
|
| 303 |
+
"2. ๊ต์ก: ์ธ๊ฐ์ ํ์ต๊ณผ ๋ฐ๋ฌ์ ์ฐ๊ตฌํ๊ณ ํจ๊ณผ์ ์ธ ๊ต์๋ฒ์ ํ์ํ๋ ํ๋ฌธ์ด๋ค. (์ฃผ์ ์ญ๋: ์ํต๋ฅ๋ ฅ, ๊ด์ฌ ๋ถ์ผ: ์๋ ๋ฐ๋ฌ)\n",
|
| 304 |
+
"3. ์ฌํ๋ณต์ง: ๋ค์ํ ์ฌํ ๊ตฌ์ฑ์์ ์ถ์ ์ง ํฅ์์ ์ํ ์ ๋์ ์ค์ฒ ๋ฐฉ๋ฒ์ ์ฐ๊ตฌํ๋ ํ๋ฌธ์ด๋ค. (์ฃผ์ ์ญ๋: ๋์ธ๊ด๊ณ๋ฅ๋ ฅ, ๊ด์ฌ ๋ถ์ผ: ์ฌํ์ ์ฝ์ ์ง์)\n",
|
| 305 |
+
"\n",
|
| 306 |
+
"### ์ถ๋ ฅ:\n",
|
| 307 |
+
"\"์ฌ๋ฆฌํ์ ๋ฐ๋ปํจ ์์์ ๊น์ด ์๊ฒ ๊ฐ์ ์ ๊ท ๊ธฐ์ธ์ด๋ฉฐ, ๊ต์ก ํ์ฅ์์๋ ์์ด๋ค์ ์ฑ์ฅ์ ์ด๋๋ ์งํ๋ก์ด ๋ฉํ ๊ฐ ๋๊ณ ์ถ์ต๋๋ค. ๋ํ, ์ฌํ์ ์ฝ์๋ค์๊ฒ ํฌ๋ง์ ๋น์ ์ ํด์ฃผ๊ธฐ ์ํด ๋
ธ๋ ฅํ๋ ๋ฐ๋ปํ ์ฌํ๋ณต์ง์ฌ๊ฐ ๋์ด, ๋ชจ๋ ์ฌ๋์ด ์กด์ค๋ฐ๊ณ ํ๋ณตํ ์ ์๋๋ก ๋๊ณ ์ถ์ต๋๋ค.\"\n"
|
| 308 |
+
]
|
| 309 |
+
}
|
| 310 |
+
],
|
| 311 |
+
"source": [
|
| 312 |
+
"# ํ๊น
ํ์ด์ค์ ์๋ ๋ชจ๋ธ์ ์ํฌํธ ๋ฐ ํ ํฌ๋์ด์ ๋ก๋ฉ์ฉ ๋๊ตฌ ๋ถ๋ฌ์ ํ
์คํธ\n",
|
| 313 |
+
"import torch\n",
|
| 314 |
+
"from transformers import AutoTokenizer, AutoModelForCausalLM\n",
|
| 315 |
+
"\n",
|
| 316 |
+
"device = torch.device(\"mps\" if torch.backends.mps.is_available() else \"cpu\")\n",
|
| 317 |
+
"print(\"์ฌ์ฉ ์ค์ธ ๋๋ฐ์ด์ค:\", device)\n",
|
| 318 |
+
"\n",
|
| 319 |
+
"# ๋ชจ๋ธ ๋ถ๋ฌ์ค๊ธฐ\n",
|
| 320 |
+
"model_name = \"LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct\"\n",
|
| 321 |
+
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
|
| 322 |
+
"model = AutoModelForCausalLM.from_pretrained(model_name).to(device)\n",
|
| 323 |
+
"\n",
|
| 324 |
+
"# ์ฌ์ฉ์๊ฐ ์
๋ ฅํ ํ
์คํธ ๋๋ ์
๋ ฅ๊ฐ\n",
|
| 325 |
+
"prompt = \"\"\"### ๋ช
๋ น:\n",
|
| 326 |
+
"๋ค์ ์ ๊ณต ์ ๋ณด๋ฅผ ๋ฐํ์ผ๋ก ํ์์ ํฅ๋ฏธ์ ํ๋์ ๋ฐ์ํ ์๊ธฐ๋ถ ๋ฌธ๊ตฌ๋ฅผ ์์ฑํ์ธ์.\n",
|
| 327 |
+
"\n",
|
| 328 |
+
"### ์
๋ ฅ:\n",
|
| 329 |
+
"1. ์ฌ๋ฆฌ: ์ธ๊ฐ์ ํ๋๊ณผ ์ ์ ๊ณผ์ ์ ๊ณผํ์ ์ผ๋ก ํ๊ตฌํ๋ ํ๋ฌธ์ด๋ค. (์ฃผ์ ์ญ๋: ๊ณต๊ฐ๋ฅ๋ ฅ, ๊ด์ฌ ๋ถ์ผ: ๊ฐ์ )\n",
|
| 330 |
+
"2. ๊ต์ก: ์ธ๊ฐ์ ํ์ต๊ณผ ๋ฐ๋ฌ์ ์ฐ๊ตฌํ๊ณ ํจ๊ณผ์ ์ธ ๊ต์๋ฒ์ ํ์ํ๋ ํ๋ฌธ์ด๋ค. (์ฃผ์ ์ญ๋: ์ํต๋ฅ๋ ฅ, ๊ด์ฌ ๋ถ์ผ: ์๋ ๋ฐ๋ฌ)\n",
|
| 331 |
+
"3. ์ฌํ๋ณต๏ฟฝ๏ฟฝ: ๋ค์ํ ์ฌํ ๊ตฌ์ฑ์์ ์ถ์ ์ง ํฅ์์ ์ํ ์ ๋์ ์ค์ฒ ๋ฐฉ๋ฒ์ ์ฐ๊ตฌํ๋ ํ๋ฌธ์ด๋ค. (์ฃผ์ ์ญ๋: ๋์ธ๊ด๊ณ๋ฅ๋ ฅ, ๊ด์ฌ ๋ถ์ผ: ์ฌํ์ ์ฝ์ ์ง์)\n",
|
| 332 |
+
"\n",
|
| 333 |
+
"### ์ถ๋ ฅ:\n",
|
| 334 |
+
"\"\"\"\n",
|
| 335 |
+
"\n",
|
| 336 |
+
"# ์
๋ ฅ๊ฐ์ ํ ํฌ๋์ด์ ๋ฅผ ํตํด ์ซ์ ํ ํฐ์ผ๋ก ๋ณํ\n",
|
| 337 |
+
"# return_tensors=\"pt\" ์
๋ ฅ๊ฐ์ ์ซ์ ํ ํฐํํ์ฌ ํ์ดํ ์ง ํ
์ ํํ๋ก ๋ฆฌํด\n",
|
| 338 |
+
"inputs = tokenizer(prompt, return_tensors=\"pt\").to(device)\n",
|
| 339 |
+
"\n",
|
| 340 |
+
"# gpt๋ ์
๋ ฅ ์์ฒด๊ฐ ํญ์ ํ๋์ ์ฐ์๋ ๋ฌธ์ฅ ์คํ์ค์ด๊ธฐ ๋๋ฌธ์ ๋ฌธ์ฅ ๊ตฌ๋ถ์ด ํ์์์\n",
|
| 341 |
+
"if 'token_type_ids' in inputs:\n",
|
| 342 |
+
" inputs.pop('token_type_ids')\n",
|
| 343 |
+
"\n",
|
| 344 |
+
"# torch.no_grad() ์ถ๋ก ์์๋ ๊ธฐ์ธ๊ธฐ ๊ฒ์ฐํ์ง ์๊ฒํ์ฌ ์๋/๋ฉ๋ชจ๋ฆฌ ์ ์ฝ -> ์ ์ด๋ ๊ฒ ํด์ผํ๋?\n",
|
| 345 |
+
"with torch.no_grad():\n",
|
| 346 |
+
" # model.generate() ์ฃผ์ด์ง ์
๋ ฅ์ ๋ํด ํ
์คํธ๋ฅผ ์์ฑํจ\n",
|
| 347 |
+
" ## ์์ธํ ๋ด์ฉ๊ณผ ์ดํด๋ ๋
ธ์
์ฐธ๊ณ \n",
|
| 348 |
+
" outputs = model.generate(\n",
|
| 349 |
+
" **inputs,\n",
|
| 350 |
+
" # ์ต๋ 80๊ฐ์ ์๋ก์ด ํ ํฐ ์์ฑ\n",
|
| 351 |
+
" ## ๋ชจ๋ธ์ด ์๋ก ์์ฑํ ์ต๋ ํ ํฐ์ ๊ฐ์\n",
|
| 352 |
+
" max_new_tokens=150,\n",
|
| 353 |
+
" # ํ๋ฅ ๊ธฐ๋ฐ ์ํ๋ง ํ์ฑํ\n",
|
| 354 |
+
" ## ๋ชจ๋ธ์ด ๋ค์ ํ ํฐ์ ์ ํํ ๋ ๊ฐ์ฅ ํ๋ฅ ๋์ ๋จ์ด๋ฅผ ๋ฌด์กฐ๊ฑด ๊ณ ๋ฅด๋ ๊ฒ์ด ์๋๋ผ ํ๋ฅ ๋ถํฌ์์ ๋ฌด์์๋ก ํ๋ ์ํ๋ง\n",
|
| 355 |
+
" do_sample=True,\n",
|
| 356 |
+
" # ํ๋ฅ ๋์ ์์ 50๊ฐ ์ค์์ ์ ํ\n",
|
| 357 |
+
" ## ์ํ๋งํ ๋ ์์ k๊ฐ์ ๋จ์ด๋ง์ผ๋ก ํ๋ณด๋ฅผ ์ ํ\n",
|
| 358 |
+
" top_k=50,\n",
|
| 359 |
+
" # ๋์ ํ๋ฅ 95%๊น์ง ํฌํจํ ํ๋ณด๊ตฐ์์ ์ ํ\n",
|
| 360 |
+
" ## ์์ ๋จ์๋ค์ ๋์ ํ๋ฅ ์ด 95% ๋์ ๋๊น์ง ํ๋ณด๊ตฐ์ ๋์ ์ํด, ๊ทธ ์์์ ์ํ๋ง\n",
|
| 361 |
+
" top_p=0.95,\n",
|
| 362 |
+
" # ์ฐฝ์์ฑ ์กฐ์ ๊ฐ\n",
|
| 363 |
+
" temperature=0.8,\n",
|
| 364 |
+
" repetition_penalty=1.1\n",
|
| 365 |
+
" )\n",
|
| 366 |
+
"\n",
|
| 367 |
+
"# ํ ํฐ์ ํ
์คํธ๋ก ๋์ฝ๋ฉ\n",
|
| 368 |
+
"# ๋ชจ๋ธ์ด ์์ฑํ ํ ํฐ ์ํ์ค ์ค ์ฒซ๋ฒ ์งธ ๊ฒฐ๊ณผ๋ฅผ\n",
|
| 369 |
+
"# skip_special_tokens=True ํน์ํ ํฐ ์ ๊ฑฐ\n",
|
| 370 |
+
"result = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
|
| 371 |
+
"print(\"\\n๐ ์์ฑ๋ ๋ฌธ์ฅ:\\n\")\n",
|
| 372 |
+
"print(result)"
|
| 373 |
+
]
|
| 374 |
+
},
|
| 375 |
+
{
|
| 376 |
+
"cell_type": "code",
|
| 377 |
+
"execution_count": 25,
|
| 378 |
+
"id": "382ea67d-298b-4d78-b16c-9f66f3052e21",
|
| 379 |
+
"metadata": {},
|
| 380 |
+
"outputs": [
|
| 381 |
+
{
|
| 382 |
+
"name": "stdout",
|
| 383 |
+
"output_type": "stream",
|
| 384 |
+
"text": [
|
| 385 |
+
"\n",
|
| 386 |
+
"๐ ์์ฑ๋ ๋ฌธ์ฅ:\n",
|
| 387 |
+
"\n",
|
| 388 |
+
"### ๋ช
๋ น:\n",
|
| 389 |
+
"๋ค์ ์ ๊ณต ์ ๋ณด๋ฅผ ๋ฐํ์ผ๋ก ํ์์ ํฅ๋ฏธ์ ํ๋์ ๋ฐ์ํ ์๊ธฐ๋ถ ๋ฌธ๊ตฌ๋ฅผ ์์ฑํ์ธ์.\n",
|
| 390 |
+
"\n",
|
| 391 |
+
"### ์
๋ ฅ:\n",
|
| 392 |
+
"1. ์ฌ๋ฆฌ: ์ธ๊ฐ์ ํ๋๊ณผ ์ ์ ๊ณผ์ ์ ๊ณผํ์ ์ผ๋ก ํ๊ตฌํ๋ ํ๋ฌธ์ด๋ค. (์ฃผ์ ์ญ๋: ๊ณต๊ฐ๋ฅ๋ ฅ, ๊ด์ฌ ๋ถ์ผ: ๊ฐ์ )\n",
|
| 393 |
+
"2. ๊ต์ก: ์ธ๊ฐ์ ํ์ต๊ณผ ๋ฐ๋ฌ์ ์ฐ๊ตฌํ๊ณ ํจ๊ณผ์ ์ธ ๊ต์๋ฒ์ ํ์ํ๋ ํ๋ฌธ์ด๋ค. (์ฃผ์ ์ญ๋: ์ํต๋ฅ๋ ฅ, ๊ด์ฌ ๋ถ์ผ: ์๋ ๋ฐ๋ฌ)\n",
|
| 394 |
+
"3. ์ฌํ๋ณต์ง: ๋ค์ํ ์ฌํ ๊ตฌ์ฑ์์ ์ถ์ ์ง ํฅ์์ ์ํ ์ ๋์ ์ค์ฒ ๋ฐฉ๋ฒ์ ์ฐ๊ตฌํ๋ ํ๋ฌธ์ด๋ค. (์ฃผ์ ์ญ๋: ๋์ธ๊ด๊ณ๋ฅ๋ ฅ, ๊ด์ฌ ๋ถ์ผ: ์ฌํ์ ์ฝ์ ์ง์)\n",
|
| 395 |
+
"\n",
|
| 396 |
+
"### ์ถ๋ ฅ:\n",
|
| 397 |
+
"\"์ฌ๋ฆฌํ์ ํตํด ์ฌ์ธํ ๊ฐ์ ์ ๊น์ด๋ฅผ ๋๋ผ๊ณ , ๊ต์ก์ ์์ด๋ค์ ์ ์ฌ๋ ฅ์ ๊นจ์ฐ๋ฉฐ ์ฑ์ฅ์ํค๋ ๋ฐ ํฐ ๊ธฐ์จ์ ๋๊ผ์ต๋๋ค. ์ฌํ๋ณต์ง์์๋ ๋ณต์กํ ์ฌํ ๋ฌธ์ ์์์๋ ๋ฐ๋ปํ ์๊ธธ๋ก ์ด๋ ค์์ ๊ฒช๋ ์ด๋ค์ ๋๋ ๊ฒ์ด ํฐ ๋ณด๋์ด์์ต๋๋ค.\"\n"
|
| 398 |
+
]
|
| 399 |
+
}
|
| 400 |
+
],
|
| 401 |
+
"source": [
|
| 402 |
+
"# ์ฌ์ฉ์๊ฐ ์
๋ ฅํ ํ
์คํธ ๋๋ ์
๋ ฅ๊ฐ\n",
|
| 403 |
+
"prompt = \"\"\"### ๋ช
๋ น:\n",
|
| 404 |
+
"๋ค์ ์ ๊ณต ์ ๋ณด๋ฅผ ๋ฐํ์ผ๋ก ํ์์ ํฅ๋ฏธ์ ํ๋์ ๋ฐ์ํ ์๊ธฐ๋ถ ๋ฌธ๊ตฌ๋ฅผ ์์ฑํ์ธ์.\n",
|
| 405 |
+
"\n",
|
| 406 |
+
"### ์
๋ ฅ:\n",
|
| 407 |
+
"1. ์ฌ๋ฆฌ: ์ธ๊ฐ์ ํ๋๊ณผ ์ ์ ๊ณผ์ ์ ๊ณผํ์ ์ผ๋ก ํ๊ตฌํ๋ ํ๋ฌธ์ด๋ค. (์ฃผ์ ์ญ๋: ๊ณต๊ฐ๋ฅ๋ ฅ, ๊ด์ฌ ๋ถ์ผ: ๊ฐ์ )\n",
|
| 408 |
+
"2. ๊ต์ก: ์ธ๊ฐ์ ํ์ต๊ณผ ๋ฐ๋ฌ์ ์ฐ๊ตฌํ๊ณ ํจ๊ณผ์ ์ธ ๊ต์๋ฒ์ ํ์ํ๋ ํ๋ฌธ์ด๋ค. (์ฃผ์ ์ญ๋: ์ํต๋ฅ๋ ฅ, ๊ด์ฌ ๋ถ์ผ: ์๋ ๋ฐ๋ฌ)\n",
|
| 409 |
+
"3. ์ฌํ๋ณต์ง: ๋ค์ํ ์ฌํ ๊ตฌ์ฑ์์ ์ถ์ ์ง ํฅ์์ ์ํ ์ ๋์ ์ค์ฒ ๋ฐฉ๋ฒ์ ์ฐ๊ตฌํ๋ ํ๋ฌธ์ด๋ค. (์ฃผ์ ์ญ๋: ๋์ธ๊ด๊ณ๋ฅ๋ ฅ, ๊ด์ฌ ๋ถ์ผ: ์ฌํ์ ์ฝ์ ์ง์)\n",
|
| 410 |
+
"\n",
|
| 411 |
+
"### ์ถ๋ ฅ:\n",
|
| 412 |
+
"\"\"\"\n",
|
| 413 |
+
"\n",
|
| 414 |
+
"# ์
๋ ฅ๊ฐ์ ํ ํฌ๋์ด์ ๋ฅผ ํตํด ์ซ์ ํ ํฐ์ผ๋ก ๋ณํ\n",
|
| 415 |
+
"# return_tensors=\"pt\" ์
๋ ฅ๊ฐ์ ์ซ์ ํ ํฐํํ์ฌ ํ์ดํ ์ง ํ
์ ํํ๋ก ๋ฆฌํด\n",
|
| 416 |
+
"inputs = tokenizer(prompt, return_tensors=\"pt\").to(device)\n",
|
| 417 |
+
"\n",
|
| 418 |
+
"# gpt๋ ์
๋ ฅ ์์ฒด๊ฐ ํญ์ ํ๋์ ์ฐ์๋ ๋ฌธ์ฅ ์คํ์ค์ด๊ธฐ ๋๋ฌธ์ ๋ฌธ์ฅ ๊ตฌ๋ถ์ด ํ์๏ฟฝ๏ฟฝ์\n",
|
| 419 |
+
"if 'token_type_ids' in inputs:\n",
|
| 420 |
+
" inputs.pop('token_type_ids')\n",
|
| 421 |
+
"\n",
|
| 422 |
+
"# torch.no_grad() ์ถ๋ก ์์๋ ๊ธฐ์ธ๊ธฐ ๊ฒ์ฐํ์ง ์๊ฒํ์ฌ ์๋/๋ฉ๋ชจ๋ฆฌ ์ ์ฝ -> ์ ์ด๋ ๊ฒ ํด์ผํ๋?\n",
|
| 423 |
+
"with torch.no_grad():\n",
|
| 424 |
+
" # model.generate() ์ฃผ์ด์ง ์
๋ ฅ์ ๋ํด ํ
์คํธ๋ฅผ ์์ฑํจ\n",
|
| 425 |
+
" ## ์์ธํ ๋ด์ฉ๊ณผ ์ดํด๋ ๋
ธ์
์ฐธ๊ณ \n",
|
| 426 |
+
" outputs = model.generate(\n",
|
| 427 |
+
" **inputs,\n",
|
| 428 |
+
" # ์ต๋ 80๊ฐ์ ์๋ก์ด ํ ํฐ ์์ฑ\n",
|
| 429 |
+
" ## ๋ชจ๋ธ์ด ์๋ก ์์ฑํ ์ต๋ ํ ํฐ์ ๊ฐ์\n",
|
| 430 |
+
" max_new_tokens=150,\n",
|
| 431 |
+
" # ํ๋ฅ ๊ธฐ๋ฐ ์ํ๋ง ํ์ฑํ\n",
|
| 432 |
+
" ## ๋ชจ๋ธ์ด ๋ค์ ํ ํฐ์ ์ ํํ ๋ ๊ฐ์ฅ ํ๋ฅ ๋์ ๋จ์ด๋ฅผ ๋ฌด์กฐ๊ฑด ๊ณ ๋ฅด๋ ๊ฒ์ด ์๋๋ผ ํ๋ฅ ๋ถํฌ์์ ๋ฌด์์๋ก ํ๋ ์ํ๋ง\n",
|
| 433 |
+
" do_sample=True,\n",
|
| 434 |
+
" # ํ๋ฅ ๋์ ์์ 50๊ฐ ์ค์์ ์ ํ\n",
|
| 435 |
+
" ## ์ํ๋งํ ๋ ์์ k๊ฐ์ ๋จ์ด๋ง์ผ๋ก ํ๋ณด๋ฅผ ์ ํ\n",
|
| 436 |
+
" top_k=50,\n",
|
| 437 |
+
" # ๋์ ํ๋ฅ 95%๊น์ง ํฌํจํ ํ๋ณด๊ตฐ์์ ์ ํ\n",
|
| 438 |
+
" ## ์์ ๋จ์๋ค์ ๋์ ํ๋ฅ ์ด 95% ๋์ ๋๊น์ง ํ๋ณด๊ตฐ์ ๋์ ์ํด, ๊ทธ ์์์ ์ํ๋ง\n",
|
| 439 |
+
" top_p=0.95,\n",
|
| 440 |
+
" # ์ฐฝ์์ฑ ์กฐ์ ๊ฐ\n",
|
| 441 |
+
" temperature=0.8,\n",
|
| 442 |
+
" repetition_penalty=1.1\n",
|
| 443 |
+
" )\n",
|
| 444 |
+
"\n",
|
| 445 |
+
"# ํ ํฐ์ ํ
์คํธ๋ก ๋์ฝ๋ฉ\n",
|
| 446 |
+
"# ๋ชจ๋ธ์ด ์์ฑํ ํ ํฐ ์ํ์ค ์ค ์ฒซ๋ฒ ์งธ ๊ฒฐ๊ณผ๋ฅผ\n",
|
| 447 |
+
"# skip_special_tokens=True ํน์ํ ํฐ ์ ๊ฑฐ\n",
|
| 448 |
+
"result = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
|
| 449 |
+
"print(\"\\n๐ ์์ฑ๋ ๋ฌธ์ฅ:\\n\")\n",
|
| 450 |
+
"print(result)"
|
| 451 |
+
]
|
| 452 |
+
},
|
| 453 |
+
{
|
| 454 |
+
"cell_type": "code",
|
| 455 |
+
"execution_count": null,
|
| 456 |
+
"id": "7f17e54f-824f-4c2c-9318-bfcf92c03315",
|
| 457 |
+
"metadata": {},
|
| 458 |
+
"outputs": [],
|
| 459 |
+
"source": []
|
| 460 |
+
}
|
| 461 |
+
],
|
| 462 |
+
"metadata": {
|
| 463 |
+
"kernelspec": {
|
| 464 |
+
"display_name": "Python [conda env:base] *",
|
| 465 |
+
"language": "python",
|
| 466 |
+
"name": "conda-base-py"
|
| 467 |
+
},
|
| 468 |
+
"language_info": {
|
| 469 |
+
"codemirror_mode": {
|
| 470 |
+
"name": "ipython",
|
| 471 |
+
"version": 3
|
| 472 |
+
},
|
| 473 |
+
"file_extension": ".py",
|
| 474 |
+
"mimetype": "text/x-python",
|
| 475 |
+
"name": "python",
|
| 476 |
+
"nbconvert_exporter": "python",
|
| 477 |
+
"pygments_lexer": "ipython3",
|
| 478 |
+
"version": "3.12.7"
|
| 479 |
+
}
|
| 480 |
+
},
|
| 481 |
+
"nbformat": 4,
|
| 482 |
+
"nbformat_minor": 5
|
| 483 |
+
}
|
prototype/kcmii_lm_test.ipynb
ADDED
|
@@ -0,0 +1,1104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 2,
|
| 6 |
+
"id": "89eeae89-a93f-4afd-be82-7299362efd13",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [
|
| 9 |
+
{
|
| 10 |
+
"name": "stdout",
|
| 11 |
+
"output_type": "stream",
|
| 12 |
+
"text": [
|
| 13 |
+
"Collecting torch\n",
|
| 14 |
+
" Downloading torch-2.7.0-cp310-none-macosx_11_0_arm64.whl.metadata (29 kB)\n",
|
| 15 |
+
"Collecting transformers\n",
|
| 16 |
+
" Downloading transformers-4.51.3-py3-none-any.whl.metadata (38 kB)\n",
|
| 17 |
+
"Requirement already satisfied: filelock in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from torch) (3.18.0)\n",
|
| 18 |
+
"Requirement already satisfied: typing-extensions>=4.10.0 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from torch) (4.12.2)\n",
|
| 19 |
+
"Collecting sympy>=1.13.3 (from torch)\n",
|
| 20 |
+
" Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)\n",
|
| 21 |
+
"Collecting networkx (from torch)\n",
|
| 22 |
+
" Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)\n",
|
| 23 |
+
"Requirement already satisfied: jinja2 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from torch) (3.1.6)\n",
|
| 24 |
+
"Requirement already satisfied: fsspec in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from torch) (2025.3.0)\n",
|
| 25 |
+
"Requirement already satisfied: huggingface-hub<1.0,>=0.30.0 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from transformers) (0.30.2)\n",
|
| 26 |
+
"Requirement already satisfied: numpy>=1.17 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from transformers) (2.2.5)\n",
|
| 27 |
+
"Requirement already satisfied: packaging>=20.0 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from transformers) (24.2)\n",
|
| 28 |
+
"Requirement already satisfied: pyyaml>=5.1 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from transformers) (6.0.2)\n",
|
| 29 |
+
"Collecting regex!=2019.12.17 (from transformers)\n",
|
| 30 |
+
" Downloading regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl.metadata (40 kB)\n",
|
| 31 |
+
"Requirement already satisfied: requests in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from transformers) (2.32.3)\n",
|
| 32 |
+
"Collecting tokenizers<0.22,>=0.21 (from transformers)\n",
|
| 33 |
+
" Downloading tokenizers-0.21.1-cp39-abi3-macosx_11_0_arm64.whl.metadata (6.8 kB)\n",
|
| 34 |
+
"Collecting safetensors>=0.4.3 (from transformers)\n",
|
| 35 |
+
" Downloading safetensors-0.5.3-cp38-abi3-macosx_11_0_arm64.whl.metadata (3.8 kB)\n",
|
| 36 |
+
"Requirement already satisfied: tqdm>=4.27 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from transformers) (4.67.1)\n",
|
| 37 |
+
"Collecting mpmath<1.4,>=1.1.0 (from sympy>=1.13.3->torch)\n",
|
| 38 |
+
" Downloading mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)\n",
|
| 39 |
+
"Requirement already satisfied: MarkupSafe>=2.0 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from jinja2->torch) (3.0.2)\n",
|
| 40 |
+
"Requirement already satisfied: charset-normalizer<4,>=2 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from requests->transformers) (3.3.2)\n",
|
| 41 |
+
"Requirement already satisfied: idna<4,>=2.5 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from requests->transformers) (3.7)\n",
|
| 42 |
+
"Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from requests->transformers) (2.3.0)\n",
|
| 43 |
+
"Requirement already satisfied: certifi>=2017.4.17 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from requests->transformers) (2025.4.26)\n",
|
| 44 |
+
"Downloading torch-2.7.0-cp310-none-macosx_11_0_arm64.whl (68.6 MB)\n",
|
| 45 |
+
"\u001b[2K \u001b[90mโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ\u001b[0m \u001b[32m68.6/68.6 MB\u001b[0m \u001b[31m58.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
|
| 46 |
+
"Downloading transformers-4.51.3-py3-none-any.whl (10.4 MB)\n",
|
| 47 |
+
"\u001b[2K \u001b[90mโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ\u001b[0m \u001b[32m10.4/10.4 MB\u001b[0m \u001b[31m55.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 48 |
+
"\u001b[?25hDownloading regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl (284 kB)\n",
|
| 49 |
+
"Downloading safetensors-0.5.3-cp38-abi3-macosx_11_0_arm64.whl (418 kB)\n",
|
| 50 |
+
"Downloading sympy-1.14.0-py3-none-any.whl (6.3 MB)\n",
|
| 51 |
+
"\u001b[2K \u001b[90mโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ\u001b[0m \u001b[32m6.3/6.3 MB\u001b[0m \u001b[31m54.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 52 |
+
"Downloading tokenizers-0.21.1-cp39-abi3-macosx_11_0_arm64.whl (2.7 MB)\n",
|
| 53 |
+
"\u001b[2K \u001b[90mโโโโโโโโโโโโโโโโ๏ฟฝ๏ฟฝ๏ฟฝโโโโโโโโโโโโโโโโโโโโโโโ\u001b[0m \u001b[32m2.7/2.7 MB\u001b[0m \u001b[31m49.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 54 |
+
"\u001b[?25hDownloading networkx-3.4.2-py3-none-any.whl (1.7 MB)\n",
|
| 55 |
+
"\u001b[2K \u001b[90mโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m45.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 56 |
+
"Downloading mpmath-1.3.0-py3-none-any.whl (536 kB)\n",
|
| 57 |
+
"\u001b[2K \u001b[90mโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ\u001b[0m \u001b[32m536.2/536.2 kB\u001b[0m \u001b[31m21.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 58 |
+
"Installing collected packages: mpmath, sympy, safetensors, regex, networkx, torch, tokenizers, transformers\n",
|
| 59 |
+
"Successfully installed mpmath-1.3.0 networkx-3.4.2 regex-2024.11.6 safetensors-0.5.3 sympy-1.14.0 tokenizers-0.21.1 torch-2.7.0 transformers-4.51.3\n"
|
| 60 |
+
]
|
| 61 |
+
}
|
| 62 |
+
],
|
| 63 |
+
"source": [
|
| 64 |
+
"# ๋ชจ๋ธ ์ํฌํธ์ ํ
์คํธ์ ๊ธฐ๋ณธ ํจํค์ง ์ค์น\n",
|
| 65 |
+
"!pip install torch transformers"
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"cell_type": "code",
|
| 70 |
+
"execution_count": 1,
|
| 71 |
+
"id": "e710baec-6288-410c-9ca3-28ab64064613",
|
| 72 |
+
"metadata": {},
|
| 73 |
+
"outputs": [
|
| 74 |
+
{
|
| 75 |
+
"name": "stdout",
|
| 76 |
+
"output_type": "stream",
|
| 77 |
+
"text": [
|
| 78 |
+
"MPS ์ฌ์ฉ ๊ฐ๋ฅ ์ฌ๋ถ: True\n",
|
| 79 |
+
"MPS ์ค๋น ์๋ฃ ์ฌ๋ถ: True\n"
|
| 80 |
+
]
|
| 81 |
+
}
|
| 82 |
+
],
|
| 83 |
+
"source": [
|
| 84 |
+
"import torch\n",
|
| 85 |
+
"print(\"MPS ์ฌ์ฉ ๊ฐ๋ฅ ์ฌ๋ถ:\", torch.backends.mps.is_available())\n",
|
| 86 |
+
"print(\"MPS ์ค๋น ์๋ฃ ์ฌ๋ถ:\", torch.backends.mps.is_built())"
|
| 87 |
+
]
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"cell_type": "code",
|
| 91 |
+
"execution_count": 1,
|
| 92 |
+
"id": "00c17b86-2c03-4d87-99aa-47511bab5715",
|
| 93 |
+
"metadata": {},
|
| 94 |
+
"outputs": [
|
| 95 |
+
{
|
| 96 |
+
"name": "stderr",
|
| 97 |
+
"output_type": "stream",
|
| 98 |
+
"text": [
|
| 99 |
+
"/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
| 100 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
| 101 |
+
]
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"ename": "NameError",
|
| 105 |
+
"evalue": "name 'torch' is not defined",
|
| 106 |
+
"output_type": "error",
|
| 107 |
+
"traceback": [
|
| 108 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
| 109 |
+
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
| 110 |
+
"Cell \u001b[0;32mIn[1], line 7\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mtransformers\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m AutoTokenizer, AutoModelForCausalLM\n\u001b[1;32m 4\u001b[0m \u001b[38;5;66;03m# ๋๋ฐ์ด์ค ์ค์ (ํด๋น ๊ธฐ๊ธฐ์์ mps๊ฐ ์ฌ์ฉ๊ฐ๋ฅํ์ง ํ์ธ)\u001b[39;00m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# ํ์ธ ํ ๋ชจ๋ธ/๋ฐ์ดํฐ๋ฅผ gpu / cpu๋ก ํ ์ง ๊ฒฐ์ \u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# torch.backends.mps.is_available() -> mps ์ฌ์ฉ ์ฌ๋ถ ํ์ธ\u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m device \u001b[38;5;241m=\u001b[39m \u001b[43mtorch\u001b[49m\u001b[38;5;241m.\u001b[39mdevice(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmps\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mbackends\u001b[38;5;241m.\u001b[39mmps\u001b[38;5;241m.\u001b[39mis_available() \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcpu\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m์ฌ์ฉ ์ค์ธ ๋๋ฐ์ด์ค:\u001b[39m\u001b[38;5;124m\"\u001b[39m, device)\n\u001b[1;32m 10\u001b[0m \u001b[38;5;66;03m# ๋ชจ๋ธ ๋ถ๋ฌ์ค๊ธฐ\u001b[39;00m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# model_name ํ๊น
ํ์ด์ค์์ ๋ถ๋ฌ์ฌ ๋ชจ๋ธ ์ง์ \u001b[39;00m\n",
|
| 111 |
+
"\u001b[0;31mNameError\u001b[0m: name 'torch' is not defined"
|
| 112 |
+
]
|
| 113 |
+
}
|
| 114 |
+
],
|
| 115 |
+
"source": [
|
| 116 |
+
"# ํ๊น
ํ์ด์ค์ ์๋ ๋ชจ๋ธ์ ์ํฌํธ ๋ฐ ํ ํฌ๋์ด์ ๋ก๋ฉ์ฉ ๋๊ตฌ ๋ถ๋ฌ์ค๊ธฐ\n",
|
| 117 |
+
"from transformers import AutoTokenizer, AutoModelForCausalLM\n",
|
| 118 |
+
"\n",
|
| 119 |
+
"# ๋๋ฐ์ด์ค ์ค์ (ํด๋น ๊ธฐ๊ธฐ์์ mps๊ฐ ์ฌ์ฉ๊ฐ๋ฅํ์ง ํ์ธ)\n",
|
| 120 |
+
"# ํ์ธ ํ ๋ชจ๋ธ/๋ฐ์ดํฐ๋ฅผ gpu / cpu๋ก ํ ์ง ๊ฒฐ์ \n",
|
| 121 |
+
"# torch.backends.mps.is_available() -> mps ์ฌ์ฉ ์ฌ๋ถ ํ์ธ\n",
|
| 122 |
+
"device = torch.device(\"mps\" if torch.backends.mps.is_available() else \"cpu\")\n",
|
| 123 |
+
"print(\"์ฌ์ฉ ์ค์ธ ๋๋ฐ์ด์ค:\", device)\n",
|
| 124 |
+
"\n",
|
| 125 |
+
"# ๋ชจ๋ธ ๋ถ๋ฌ์ค๊ธฐ\n",
|
| 126 |
+
"# model_name ํ๊น
ํ์ด์ค์์ ๋ถ๋ฌ์ฌ ๋ชจ๋ธ ์ง์ \n",
|
| 127 |
+
"model_name = \"EleutherAI/polyglot-ko-1.3b\"\n",
|
| 128 |
+
"# ํด๋น ๋ชจ๋ธ๊ณผ ํธํ๋๋ ํ ํฌ๋์ด์ (๋ฌธ์๏ฟฝ๏ฟฝ๏ฟฝ -> ์ซ์ ๋ณํ๊ธฐ) ๋ถ๋ฌ์ค๊ธฐ ## ํ ํฌ๋์ด์ ์ ๋ํด์ ์๊ธฐ\n",
|
| 129 |
+
"## ์์ฐ์ด๋ฅผ ๋ชจ๋ธ์ด ์ดํดํ ์ ์๋ ์ซ์ ํ ํฐ์ผ๋ก ๋ฐ๊พธ๋ ๋๊ตฌ\n",
|
| 130 |
+
"## ๋จ์ง ํ ํฌ๋์ด์ ๋ ๋จ์ด๋ฅผ ์ซ์ํํด์ ๋ช
๋ช
ํ ๋๊ตฌ์ผ ๋ฟ ์ด๋ ํ ์๋ฏธ๋ ์์\n",
|
| 131 |
+
"## +) ์๋ฒ ๋ฉ์ ๋จ์ด์ ์๋ฏธ๊ฐ ๋ด๊ฒจ ์๋ ๋ฒกํฐ ๊ณต๊ฐ\n",
|
| 132 |
+
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
|
| 133 |
+
"# ํ
์คํธ ์์ฑ์ฉ ๋ชจ๋ธ ๋ถ๋ฌ์ค๊ธฐ / to(device) ๋ชจ๋ธ์ ์๊น ์ค์ ํ ๋๋ฐ์ด์ค๋ก ์ด๋\n",
|
| 134 |
+
"model = AutoModelForCausalLM.from_pretrained(model_name).to(device)\n",
|
| 135 |
+
"\n",
|
| 136 |
+
"# ์ฌ์ฉ์๊ฐ ์
๋ ฅํ ํ
์คํธ ๋๋ ์
๋ ฅ๊ฐ\n",
|
| 137 |
+
"prompt = \"์ด๋ฌธํ์ด๋ ๋ฌด์์ธ๊ฐ?\\nํด์:\"\n",
|
| 138 |
+
"\n",
|
| 139 |
+
"# ์
๋ ฅ๊ฐ์ ํ ํฌ๋์ด์ ๋ฅผ ํตํด ์ซ์ ํ ํฐ์ผ๋ก ๋ณํ\n",
|
| 140 |
+
"# return_tensors=\"pt\" ์
๋ ฅ๊ฐ์ ์ซ์ ํ ํฐํํ์ฌ ํ์ดํ ์ง ํ
์ ํํ๋ก ๋ฆฌํด\n",
|
| 141 |
+
"inputs = tokenizer(prompt, return_tensors=\"pt\").to(device)\n",
|
| 142 |
+
"\n",
|
| 143 |
+
"# GPT ๋ชจ๋ธ์ ๋ถํ์ํ token_type_ids ์ ๊ฑฐ\n",
|
| 144 |
+
"# ํ ํฐํํ๋ฉด ์ด๋ป๊ฒ ๋์ค๊ธธ๋ ๊ถ๊ธํ๊ธฐ๋ ํ๊ณ \n",
|
| 145 |
+
"# gpt ๋ชจ๋ธ๊ณผ bert๋ชจ๋ธ์ด๋ ๋จธ๊ฐ ๋ค๋ฅธ์ง๋ ํ์ธํด์ ์ ๋ถํ์ํ์ง ํ์ธ์ด ํ์ํจ\n",
|
| 146 |
+
"# {\n",
|
| 147 |
+
"# 'input_ids': tensor([[ 101, ... , 102]]),\n",
|
| 148 |
+
"# 'attention_mask': tensor([[1, 1, 1, ...]]),\n",
|
| 149 |
+
"# 'token_type_ids': tensor([[0, 0, 0, ...]])\n",
|
| 150 |
+
"# }\n",
|
| 151 |
+
"# input_ids : ๋จ์ด๋ฅผ ์ซ์๋ก ๋ฐ๊พผ ๊ฒฐ๊ณผ\n",
|
| 152 |
+
"# attetion_mask : ์ค์ ๋จ์ด(1) vs ํจ๋ฉ(0) ๊ตฌ๋ถ\n",
|
| 153 |
+
"# token_type_ids : ๋ ๋ฌธ์ฅ์ ๊ตฌ๋ถํ๊ธฐ ์ํ segment ID\n",
|
| 154 |
+
"\n",
|
| 155 |
+
"# [CLS] ๋๋ ์ฌ๊ณผ๋ฅผ ๋จน์๋ค [SEP] ํ์ง๋ง ๋ฐฐ๋ ์ ๋จน์๋ค [SEP]\n",
|
| 156 |
+
"# โ ๋ฌธ์ฅ A โ ๋ฌธ์ฅ B\n",
|
| 157 |
+
"# token_type_ids: [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]\n",
|
| 158 |
+
"\n",
|
| 159 |
+
"# gpt๋ ์
๋ ฅ ์์ฒด๊ฐ ํญ์ ํ๋์ ์ฐ์๋ ๋ฌธ์ฅ ์คํ์ค์ด๊ธฐ ๋๋ฌธ์ ๋ฌธ์ฅ ๊ตฌ๋ถ์ด ํ์์์\n",
|
| 160 |
+
"if 'token_type_ids' in inputs:\n",
|
| 161 |
+
" inputs.pop('token_type_ids')\n",
|
| 162 |
+
"\n",
|
| 163 |
+
"# torch.no_grad() ์ถ๋ก ์์๋ ๊ธฐ์ธ๊ธฐ ๊ฒ์ฐํ์ง ์๊ฒํ์ฌ ์๋/๋ฉ๋ชจ๋ฆฌ ์ ์ฝ -> ์ ์ด๋ ๊ฒ ํด์ผํ๋?\n",
|
| 164 |
+
"with torch.no_grad():\n",
|
| 165 |
+
" # model.generate() ์ฃผ์ด์ง ์
๋ ฅ์ ๋ํด ํ
์คํธ๋ฅผ ์์ฑํจ\n",
|
| 166 |
+
" ## ์์ธํ ๋ด์ฉ๊ณผ ์ดํด๋ ๋
ธ์
์ฐธ๊ณ \n",
|
| 167 |
+
" outputs = model.generate(\n",
|
| 168 |
+
" **inputs,\n",
|
| 169 |
+
" # ์ต๋ 80๊ฐ์ ์๋ก์ด ํ ํฐ ์์ฑ\n",
|
| 170 |
+
" ## ๋ชจ๋ธ์ด ์๋ก ์์ฑํ ์ต๋ ํ ํฐ์ ๊ฐ์\n",
|
| 171 |
+
" max_new_tokens=100,\n",
|
| 172 |
+
" # ํ๋ฅ ๊ธฐ๋ฐ ์ํ๋ง ํ์ฑํ\n",
|
| 173 |
+
" ## ๋ชจ๋ธ์ด ๋ค์ ํ ํฐ์ ์ ํํ ๋ ๊ฐ์ฅ ํ๋ฅ ๋์ ๋จ์ด๋ฅผ ๋ฌด์กฐ๊ฑด ๊ณ ๋ฅด๋ ๊ฒ์ด ์๋๋ผ ํ๋ฅ ๋ถํฌ์์ ๋ฌด์์๋ก ํ๋ ์ํ๋ง\n",
|
| 174 |
+
" do_sample=True,\n",
|
| 175 |
+
" # ํ๋ฅ ๋์ ์์ 50๊ฐ ์ค์์ ์ ํ\n",
|
| 176 |
+
" ## ์ํ๋งํ ๋ ์์ k๊ฐ์ ๋จ์ด๋ง์ผ๋ก ํ๋ณด๋ฅผ ์ ํ\n",
|
| 177 |
+
" top_k=50,\n",
|
| 178 |
+
" # ๋์ ํ๋ฅ 95%๊น์ง ํฌํจํ ํ๋ณด๊ตฐ์์ ์ ํ\n",
|
| 179 |
+
" ## ์์ ๋จ์๋ค์ ๋์ ํ๋ฅ ์ด 95% ๋์ ๋๊น์ง ํ๋ณด๊ตฐ์ ๋์ ์ํด, ๊ทธ ์์์ ์ํ๋ง\n",
|
| 180 |
+
" top_p=0.95,\n",
|
| 181 |
+
" # ์ฐฝ์์ฑ ์กฐ์ ๊ฐ\n",
|
| 182 |
+
" temperature=0.8\n",
|
| 183 |
+
" )\n",
|
| 184 |
+
"\n",
|
| 185 |
+
"# ํ ํฐ์ ํ
์คํธ๋ก ๋์ฝ๋ฉ\n",
|
| 186 |
+
"# ๋ชจ๋ธ์ด ์์ฑํ ํ ํฐ ์ํ์ค ์ค ์ฒซ๋ฒ ์งธ ๊ฒฐ๊ณผ๋ฅผ\n",
|
| 187 |
+
"# skip_special_tokens=True ํน์ํ ํฐ ์ ๊ฑฐ\n",
|
| 188 |
+
"result = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
|
| 189 |
+
"print(\"\\n๐ ์์ฑ๋ ๋ฌธ์ฅ:\\n\")\n",
|
| 190 |
+
"print(result)"
|
| 191 |
+
]
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"cell_type": "code",
|
| 195 |
+
"execution_count": 3,
|
| 196 |
+
"id": "ca568fec-565c-434f-98a2-4d3830768f61",
|
| 197 |
+
"metadata": {},
|
| 198 |
+
"outputs": [
|
| 199 |
+
{
|
| 200 |
+
"name": "stderr",
|
| 201 |
+
"output_type": "stream",
|
| 202 |
+
"text": [
|
| 203 |
+
"Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
|
| 204 |
+
]
|
| 205 |
+
},
|
| 206 |
+
{
|
| 207 |
+
"name": "stdout",
|
| 208 |
+
"output_type": "stream",
|
| 209 |
+
"text": [
|
| 210 |
+
"\n",
|
| 211 |
+
"๐ ์์ฑ๋ ๋ฌธ์ฅ:\n",
|
| 212 |
+
"\n",
|
| 213 |
+
"๋๋ ์ด๋ค ๋ชจ๋ธ์ด๋?\n",
|
| 214 |
+
"ํด์: ๋๋ ๊ทธ์ ๋ชธ์ ์จ๊ธฐ๊ณ ๊ทธ์ ๋ชธ์ ํ๊ณ ์๊ตฌ๋.๋ฒ์ญ: ๋๋ ๋์ ๋ชธ์ ํ๊ณ ์๊ตฌ๋. ืขึธืจึธืึธื ืึดืคึฐื ึตื ืึธืึนืช ืจึธืึธืึพืึธืึดื ืขึน๏ฟฝ\n"
|
| 215 |
+
]
|
| 216 |
+
}
|
| 217 |
+
],
|
| 218 |
+
"source": [
|
| 219 |
+
"# ์ฌ์ฉ์๊ฐ ์
๋ ฅํ ํ
์คํธ ๋๋ ์
๋ ฅ๊ฐ\n",
|
| 220 |
+
"prompt = \"๋๋ ์ด๋ค ๋ชจ๋ธ์ด๋?\\nํด์:\"\n",
|
| 221 |
+
"\n",
|
| 222 |
+
"# ์
๋ ฅ๊ฐ์ ํ ํฌ๋์ด์ ๋ฅผ ํตํด ์ซ์ ํ ํฐ์ผ๋ก ๋ณํ\n",
|
| 223 |
+
"inputs = tokenizer(prompt, return_tensors=\"pt\").to(device)\n",
|
| 224 |
+
"\n",
|
| 225 |
+
"# gpt๋ ์
๋ ฅ ์์ฒด๊ฐ ํญ์ ํ๋์ ์ฐ์๋ ๋ฌธ์ฅ ์คํ์ค์ด๊ธฐ ๋๋ฌธ์ ๋ฌธ์ฅ ๊ตฌ๋ถ์ด ํ์์์\n",
|
| 226 |
+
"if 'token_type_ids' in inputs:\n",
|
| 227 |
+
" inputs.pop('token_type_ids')\n",
|
| 228 |
+
"\n",
|
| 229 |
+
"# torch.no_grad() ์ถ๋ก ์์๋ ๊ธฐ์ธ๊ธฐ ๊ฒ์ฐํ์ง ์๊ฒํ์ฌ ์๋/๋ฉ๋ชจ๋ฆฌ ์ ์ฝ -> ์ ์ด๋ ๊ฒ ํด์ผํ๋?\n",
|
| 230 |
+
"with torch.no_grad():\n",
|
| 231 |
+
" # model.generate() ์ฃผ์ด์ง ์
๋ ฅ์ ๋ํด ํ
์คํธ๋ฅผ ์์ฑํจ\n",
|
| 232 |
+
" ## ์์ธํ ๋ด์ฉ๊ณผ ์ดํด๋ ๋
ธ์
์ฐธ๊ณ \n",
|
| 233 |
+
" outputs = model.generate(\n",
|
| 234 |
+
" **inputs,\n",
|
| 235 |
+
" # ์ต๋ 80๊ฐ์ ์๋ก์ด ํ ํฐ ์์ฑ\n",
|
| 236 |
+
" ## ๋ชจ๋ธ์ด ์๋ก ์์ฑํ ์ต๋ ํ ํฐ์ ๊ฐ์\n",
|
| 237 |
+
" max_new_tokens=100,\n",
|
| 238 |
+
" # ํ๋ฅ ๊ธฐ๋ฐ ์ํ๋ง ํ์ฑํ\n",
|
| 239 |
+
" ## ๋ชจ๋ธ์ด ๋ค์ ํ ํฐ์ ์ ํํ ๋ ๊ฐ์ฅ ํ๋ฅ ๋์ ๋จ์ด๋ฅผ ๋ฌด์กฐ๊ฑด ๊ณ ๋ฅด๋ ๊ฒ์ด ์๋๋ผ ํ๋ฅ ๋ถํฌ์์ ๋ฌด์์๋ก ํ๋ ์ํ๋ง\n",
|
| 240 |
+
" do_sample=True,\n",
|
| 241 |
+
" # ํ๋ฅ ๋์ ์์ 50๊ฐ ์ค์์ ์ ํ\n",
|
| 242 |
+
" ## ์ํ๋งํ ๋ ์์ k๊ฐ์ ๋จ์ด๋ง์ผ๋ก ํ๋ณด๋ฅผ ์ ํ\n",
|
| 243 |
+
" top_k=50,\n",
|
| 244 |
+
" # ๋์ ํ๋ฅ 95%๊น์ง ํฌํจํ ํ๋ณด๊ตฐ์์ ์ ํ\n",
|
| 245 |
+
" ## ์์ ๋จ์๋ค์ ๋์ ํ๋ฅ ์ด 95% ๋์ ๋๊น์ง ํ๋ณด๊ตฐ์ ๋์ ์ํด, ๊ทธ ์์์ ์ํ๋ง\n",
|
| 246 |
+
" top_p=0.95,\n",
|
| 247 |
+
" # ์ฐฝ์์ฑ ์กฐ์ ๊ฐ\n",
|
| 248 |
+
" temperature=0.8\n",
|
| 249 |
+
" )\n",
|
| 250 |
+
"\n",
|
| 251 |
+
"# ํ ํฐ์ ํ
์คํธ๋ก ๋์ฝ๋ฉ\n",
|
| 252 |
+
"# ๋ชจ๋ธ์ด ์์ฑํ ํ ํฐ ์ํ์ค ์ค ์ฒซ๋ฒ ์งธ ๊ฒฐ๊ณผ๋ฅผ\n",
|
| 253 |
+
"# skip_special_tokens=True ํน์ํ ํฐ ์ ๊ฑฐ\n",
|
| 254 |
+
"result = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
|
| 255 |
+
"print(\"\\n๐ ์์ฑ๋ ๋ฌธ์ฅ:\\n\")\n",
|
| 256 |
+
"print(result)"
|
| 257 |
+
]
|
| 258 |
+
},
|
| 259 |
+
{
|
| 260 |
+
"cell_type": "markdown",
|
| 261 |
+
"id": "c01a9f6f-9eb4-4917-bc18-3c33674b7a57",
|
| 262 |
+
"metadata": {},
|
| 263 |
+
"source": [
|
| 264 |
+
"### kcmii ์ง์นจ์๋ก ๊ฒฝ๋ ํ์ต ํ ์ฑ๋ฅ ํ
์คํธ\n",
|
| 265 |
+
"kcmii์ ์ ๊ณต์ ์๊ฐ 50๊ฐ ์ ๋๋๊ธฐ ๋๋ฌธ์ \n",
|
| 266 |
+
"์ ๊ณต ํฅ๋ฏธ์ฒ๋ ์กฐํฉ๋ณ๋ก ์๊ธฐ๋ถ ๋ฌธ๊ตฌ๋ฅผ ํ์ต์ํค๊ธฐ์ ๋นํจ์จ์ ์ด๋ผ๊ณ ํ๋จ \n",
|
| 267 |
+
"๊ทธ๋ ๊ธฐ ๋๋ฌธ์ ์ ๊ณต์ ๋ํ ๊ฐ๋
๋จผ์ ํ์ต ์ํจ ํ ์๊ธฐ๋ถ ๋ฌธ๊ตฌ ํ์ต์ํค๋ ํ๋ฆ์ผ๋ก ๊ฒฐ์ "
|
| 268 |
+
]
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"cell_type": "code",
|
| 272 |
+
"execution_count": 30,
|
| 273 |
+
"id": "56bcb149-6237-487e-b93d-dd21889ce93f",
|
| 274 |
+
"metadata": {},
|
| 275 |
+
"outputs": [
|
| 276 |
+
{
|
| 277 |
+
"name": "stderr",
|
| 278 |
+
"output_type": "stream",
|
| 279 |
+
"text": [
|
| 280 |
+
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
|
| 281 |
+
"To disable this warning, you can either:\n",
|
| 282 |
+
"\t- Avoid using `tokenizers` before the fork if possible\n",
|
| 283 |
+
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
|
| 284 |
+
]
|
| 285 |
+
},
|
| 286 |
+
{
|
| 287 |
+
"name": "stdout",
|
| 288 |
+
"output_type": "stream",
|
| 289 |
+
"text": [
|
| 290 |
+
"Requirement already satisfied: datasets in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (3.5.1)\n",
|
| 291 |
+
"Requirement already satisfied: peft in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (0.15.2)\n",
|
| 292 |
+
"Requirement already satisfied: accelerate in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (1.6.0)\n",
|
| 293 |
+
"Requirement already satisfied: filelock in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from datasets) (3.18.0)\n",
|
| 294 |
+
"Requirement already satisfied: numpy>=1.17 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from datasets) (2.2.5)\n",
|
| 295 |
+
"Requirement already satisfied: pyarrow>=15.0.0 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from datasets) (20.0.0)\n",
|
| 296 |
+
"Requirement already satisfied: dill<0.3.9,>=0.3.0 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from datasets) (0.3.8)\n",
|
| 297 |
+
"Requirement already satisfied: pandas in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from datasets) (2.2.3)\n",
|
| 298 |
+
"Requirement already satisfied: requests>=2.32.2 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from datasets) (2.32.3)\n",
|
| 299 |
+
"Requirement already satisfied: tqdm>=4.66.3 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from datasets) (4.67.1)\n",
|
| 300 |
+
"Requirement already satisfied: xxhash in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from datasets) (3.5.0)\n",
|
| 301 |
+
"Requirement already satisfied: multiprocess<0.70.17 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from datasets) (0.70.16)\n",
|
| 302 |
+
"Requirement already satisfied: fsspec<=2025.3.0,>=2023.1.0 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (2025.3.0)\n",
|
| 303 |
+
"Requirement already satisfied: aiohttp in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from datasets) (3.11.18)\n",
|
| 304 |
+
"Requirement already satisfied: huggingface-hub>=0.24.0 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from datasets) (0.30.2)\n",
|
| 305 |
+
"Requirement already satisfied: packaging in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from datasets) (24.2)\n",
|
| 306 |
+
"Requirement already satisfied: pyyaml>=5.1 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from datasets) (6.0.2)\n",
|
| 307 |
+
"Requirement already satisfied: psutil in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from peft) (5.9.0)\n",
|
| 308 |
+
"Requirement already satisfied: torch>=1.13.0 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from peft) (2.7.0)\n",
|
| 309 |
+
"Requirement already satisfied: transformers in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from peft) (4.51.3)\n",
|
| 310 |
+
"Requirement already satisfied: safetensors in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from peft) (0.5.3)\n",
|
| 311 |
+
"Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from aiohttp->datasets) (2.6.1)\n",
|
| 312 |
+
"Requirement already satisfied: aiosignal>=1.1.2 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.2)\n",
|
| 313 |
+
"Requirement already satisfied: async-timeout<6.0,>=4.0 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from aiohttp->datasets) (5.0.1)\n",
|
| 314 |
+
"Requirement already satisfied: attrs>=17.3.0 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from aiohttp->datasets) (24.3.0)\n",
|
| 315 |
+
"Requirement already satisfied: frozenlist>=1.1.1 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from aiohttp->datasets) (1.6.0)\n",
|
| 316 |
+
"Requirement already satisfied: multidict<7.0,>=4.5 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from aiohttp->datasets) (6.4.3)\n",
|
| 317 |
+
"Requirement already satisfied: propcache>=0.2.0 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from aiohttp->datasets) (0.3.1)\n",
|
| 318 |
+
"Requirement already satisfied: yarl<2.0,>=1.17.0 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from aiohttp->datasets) (1.20.0)\n",
|
| 319 |
+
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from huggingface-hub>=0.24.0->datasets) (4.12.2)\n",
|
| 320 |
+
"Requirement already satisfied: charset-normalizer<4,>=2 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from requests>=2.32.2->datasets) (3.3.2)\n",
|
| 321 |
+
"Requirement already satisfied: idna<4,>=2.5 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from requests>=2.32.2->datasets) (3.7)\n",
|
| 322 |
+
"Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from requests>=2.32.2->datasets) (2.3.0)\n",
|
| 323 |
+
"Requirement already satisfied: certifi>=2017.4.17 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from requests>=2.32.2->datasets) (2025.4.26)\n",
|
| 324 |
+
"Requirement already satisfied: sympy>=1.13.3 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from torch>=1.13.0->peft) (1.14.0)\n",
|
| 325 |
+
"Requirement already satisfied: networkx in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from torch>=1.13.0->peft) (3.4.2)\n",
|
| 326 |
+
"Requirement already satisfied: jinja2 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from torch>=1.13.0->peft) (3.1.6)\n",
|
| 327 |
+
"Requirement already satisfied: python-dateutil>=2.8.2 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from pandas->datasets) (2.9.0.post0)\n",
|
| 328 |
+
"Requirement already satisfied: pytz>=2020.1 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from pandas->datasets) (2025.2)\n",
|
| 329 |
+
"Requirement already satisfied: tzdata>=2022.7 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from pandas->datasets) (2025.2)\n",
|
| 330 |
+
"Requirement already satisfied: regex!=2019.12.17 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from transformers->peft) (2024.11.6)\n",
|
| 331 |
+
"Requirement already satisfied: tokenizers<0.22,>=0.21 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from transformers->peft) (0.21.1)\n",
|
| 332 |
+
"Requirement already satisfied: six>=1.5 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)\n",
|
| 333 |
+
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from sympy>=1.13.3->torch>=1.13.0->peft) (1.3.0)\n",
|
| 334 |
+
"Requirement already satisfied: MarkupSafe>=2.0 in /opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages (from jinja2->torch>=1.13.0->peft) (3.0.2)\n"
|
| 335 |
+
]
|
| 336 |
+
}
|
| 337 |
+
],
|
| 338 |
+
"source": [
|
| 339 |
+
"!pip install datasets peft accelerate"
|
| 340 |
+
]
|
| 341 |
+
},
|
| 342 |
+
{
|
| 343 |
+
"cell_type": "code",
|
| 344 |
+
"execution_count": 1,
|
| 345 |
+
"id": "1f0a84f9-8f1d-4863-a25f-cf8349b3a67f",
|
| 346 |
+
"metadata": {},
|
| 347 |
+
"outputs": [
|
| 348 |
+
{
|
| 349 |
+
"name": "stderr",
|
| 350 |
+
"output_type": "stream",
|
| 351 |
+
"text": [
|
| 352 |
+
"/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
| 353 |
+
" from .autonotebook import tqdm as notebook_tqdm\n",
|
| 354 |
+
"Loading checkpoint shards: 100%|โโโโโโโโโโ| 3/3 [00:01<00:00, 1.68it/s]\n"
|
| 355 |
+
]
|
| 356 |
+
}
|
| 357 |
+
],
|
| 358 |
+
"source": [
|
| 359 |
+
"# ํ๊น
ํ์ด์ค์์ ๋ฐ์ดํฐ์
์ ๋ถ๋ฌ์ค๊ฑฐ๋, ๋ก์ปฌ์์ ๋ฐ์ดํฐ๋ฅผ ์ฝ๊ฒ ํ์ต์ฉ์ผ๋ก ์
ํ
ํ๊ฒ ํ๋ ํจํค์ง\n",
|
| 360 |
+
"from datasets import load_dataset\n",
|
| 361 |
+
"# AutoTokenizer ํ
์คํธ๋ฅผ ์ซ์ id๋ก ๋ณํ\n",
|
| 362 |
+
"# AutoModelForCausalLM GPT๋ฅ ์์ฑ ๋ชจ๋ธ์ ๋ถ๋ฌ์ค๊ธฐ ์ํ ํด๋์ค\n",
|
| 363 |
+
"# TrainingArguments ํ์ต ์ค์ ์ ๋ด๋ ๊ฐ์ฒด\n",
|
| 364 |
+
"# Trainer ํ์ต ์ ์ฒด๋ฅผ ๋ด๋นํ๋ ํ์ต ์์ง\n",
|
| 365 |
+
"from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer\n",
|
| 366 |
+
"# peft ๊ฒฝ๋ํ๋ LoRA ๊ด๋ จ ๋๊ตฌ ์ํฌํธ\n",
|
| 367 |
+
"from peft import get_peft_model, LoraConfig, TaskType\n",
|
| 368 |
+
"\n",
|
| 369 |
+
"# 1. ๋ก์ปฌ์์ ํ์ต ๋ฐ์ดํฐ ๋ก๋\n",
|
| 370 |
+
"dataset = load_dataset(\"json\", data_files=\"dataset/kcmii_major_instruction_data_summarized.jsonl\", split=\"train\")\n",
|
| 371 |
+
"\n",
|
| 372 |
+
"# 2. ๋ชจ๋ธ ๋ฐ ํ ํฌ๋์ด์ ๋ก๋ฉ\n",
|
| 373 |
+
"## ๋ชจ๋ธ ์ํฌํธ\n",
|
| 374 |
+
"model_name = \"EleutherAI/polyglot-ko-1.3b\"\n",
|
| 375 |
+
"## ๋ชจ๋ธ์ ์๋ง์ ํ ํฌ๋์ด์ ์ํฌํธ\n",
|
| 376 |
+
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
|
| 377 |
+
"## ๋ถ๋ฌ์จ ๋ชจ๋ธ์ mps์ ์ฌ๋ ค ํ์ต ๋ฐ ์ถ๋ก ํ ์ค๋น\n",
|
| 378 |
+
"model = AutoModelForCausalLM.from_pretrained(model_name).to(\"mps\")\n",
|
| 379 |
+
"\n",
|
| 380 |
+
"# 3. LoRA ๊ฒฝ๋ ํ์ต ์ค์ \n",
|
| 381 |
+
"peft_config = LoraConfig(\n",
|
| 382 |
+
" ## causal_lm = GPT๋ฅ๋ ์ผ์ชฝ->์ค๋ฅธ์ชฝ์ผ๋ก ๋ฌธ์ฅ ์์ฑํ๋ ๊ตฌ์กฐ, ์ด์ ๋ง์ถฐ LoRA ์ฝ์
์์น๋ฅผ ๊ฒฐ์ ํ๊ธฐ ์ํด\n",
|
| 383 |
+
" ## ๋ชจ๋ธ ๊ตฌ์กฐ ๋ช
์\n",
|
| 384 |
+
" task_type=TaskType.CAUSAL_LM,\n",
|
| 385 |
+
" ## LoRA์์ ์ฝ์๋๋ ์ ์ฐจ์ ํ๋ ฌ์ rank\n",
|
| 386 |
+
" ## A, B๋ผ๋ ์์ ํ๋ ฌ ์ถ๊ฐ\n",
|
| 387 |
+
" ## ๊ธฐ์กด ๊ฐ์ค์น๊ฐ 4096 x 4096์ด๋ผ๋ฉด A : 4096 X 8, B : 8 X 4096 ํ๋ ฌ ์ถ๊ฐํ์ฌ\n",
|
| 388 |
+
" ## ์๋ก์ด ํ๋ผ๋ฏธํฐ๋ฅผ ํ์ต์ํด\n",
|
| 389 |
+
" r=8,\n",
|
| 390 |
+
" ## LoRA์์ ์ฌ์ฉ๋๋ ์ค์ผ์ผ๋ง ๊ณ์\n",
|
| 391 |
+
" ## ๋ชจ๋ธ์ ํ๋ผ๋ฏธํฐ์ ๋นํด ์๋์ ์ผ๋ก ์์ ๊ฐฏ์๋ฅผ ํ์ต์ํค๊ธฐ ๋๋ฌธ์ ์ํฅ์ด ์์\n",
|
| 392 |
+
" ## ์ํ๊ฐ์ ์ค์ ํด ์ํฅ๋ ฅ์ ์ฆํญ์์ผ ์ค\n",
|
| 393 |
+
" ## ๋๋ฌด ์์ผ๋ฉด ํ์ต ํจ๊ณผ๊ฐ ์๊ณ , ํฌ๋ฉด ๊ณผ์ ํ๋๋จ\n",
|
| 394 |
+
" lora_alpha=16,\n",
|
| 395 |
+
" ## LoRA layer์ ์ ์ฉํ ๋๋กญ์์ ํ๋ฅ \n",
|
| 396 |
+
" ## ํ์ต ์ค A ๋๋ B์ ์
๋ ฅ๋ ์ผ๋ถ ์ฐ๊ฒฐ์ 10% ํ๋ฅ ๋ก ๋์\n",
|
| 397 |
+
" ## ํจํด์ ๊ทธ๋๋ก ์ธ์๋ฒ๋ ค์ ์ผ๋ฐํ๊ฐ ์๋๋ ๊ทธ๋๊น ๊ณผ์ ํฉ์ ๋ฐฉ์ง\n",
|
| 398 |
+
" lora_dropout=0.1,\n",
|
| 399 |
+
" ## ๊ธฐ์กด ๋ชจ๋ธ์ bias ํ๋ผ๋ฏธํฐ๋ฅผ ๊ทธ๋๋ก ๋์ง ์ค์ \n",
|
| 400 |
+
" bias=\"none\"\n",
|
| 401 |
+
")\n",
|
| 402 |
+
"## ๊ธฐ์กด ๋ชจ๋ธ์ LoRA ๊ณ์ธต์ ์ฝ์
ํ์ฌ peft ํ์ต์ด ๊ฐ๋ฅํ๋๋ก ๋ฐ๊ฟ\n",
|
| 403 |
+
"model = get_peft_model(model, peft_config)\n",
|
| 404 |
+
"\n",
|
| 405 |
+
"# 4. ํ์ต ๋ฐ์ดํฐ์ ํ ํฐํ ํจ์ ์ ์\n",
|
| 406 |
+
"def tokenize(example):\n",
|
| 407 |
+
" # ํ์ต ๋ฐ์ดํฐ์ ํ
ํ๋ฆฟ(๊ตฌ์กฐ) ๊ณ ๋ คํด ํ๋กฌํํธ ์์ฑ\n",
|
| 408 |
+
" ## ์
๋ ฅ์ ํ๋์ ๊ธด ๋ฌธ์์ด๋ก ๋ง๋๋ ์ญํ \n",
|
| 409 |
+
" prompt = f\"{example['instruction']}\\n๋ต๋ณ: {example['response']}\"\n",
|
| 410 |
+
" # ํ๋กฌํํธ๋ฅผ ๋ชจ๋ธ์ด ์ดํดํ ์ ์๋๋ก ํ
์คํธ id๋ก ๋ณํ\n",
|
| 411 |
+
" ## padding=\"max_length\" ์
๋ ฅ ๊ธธ์ด๊ฐ ์งง์ ๊ฒฝ์ฐ ๋ค์ 0์ผ๋ก ์ฑ์\n",
|
| 412 |
+
" ## truncation=True ์
๋ ฅ์ด 512 ํ ํฐ๋ณด๋ค ๊ธธ ๊ฒฝ์ฐ ์๋ผ๋\n",
|
| 413 |
+
" ## max_length=512 ํ๋์ ํ์ต ๋ฐ์ดํฐ์ ์ต๋ ๊ธธ์ด๋ 512๋ก ์ ํ\n",
|
| 414 |
+
" encoded = tokenizer(prompt, padding=\"max_length\", truncation=True, max_length=512)\n",
|
| 415 |
+
" # ํ์ต์ ์ ๋ต์ ์
๋ ฅ๊ณผ ๋์ผํ๊ฒ ์ค์ \n",
|
| 416 |
+
" ## gpt ๊ตฌ์กฐ์์ ์
๋ ฅ ๋ค์ ๋์์ผํ ๋จ์ด๋ฅผ ์์ธกํ๋ ํ์ต์ ํจ\n",
|
| 417 |
+
" ## ์
๋ ฅ๊ณผ ์ ๋ต์ด ๊ฐ๊ณ , ๋ชจ๋ธ์ ํ ํ ํฐ์ฉ ๋ค์ ๋จ์ด๋ฅผ ๋ง์ถ๋ ๋ฐฉ์์ผ๋ก ํ์ตํจ\n",
|
| 418 |
+
" encoded[\"labels\"] = encoded[\"input_ids\"].copy()\n",
|
| 419 |
+
" return encoded"
|
| 420 |
+
]
|
| 421 |
+
},
|
| 422 |
+
{
|
| 423 |
+
"cell_type": "code",
|
| 424 |
+
"execution_count": 2,
|
| 425 |
+
"id": "4fa47208-c384-4d19-b713-216188185dff",
|
| 426 |
+
"metadata": {
|
| 427 |
+
"scrolled": true
|
| 428 |
+
},
|
| 429 |
+
"outputs": [
|
| 430 |
+
{
|
| 431 |
+
"name": "stdout",
|
| 432 |
+
"output_type": "stream",
|
| 433 |
+
"text": [
|
| 434 |
+
"{'instruction': '์ด๋ฌธํ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ด๋ฌธํ ์ ๊ณต์์๋ ๋ยท์์ ์ธ์ด์ ์๋ฆฌ๋ฅผ ์ฒด๊ณ์ ์ผ๋ก ์ฐ๊ตฌํ๊ณ , ๊ฐ ๋๋ผ๏ฟฝ๏ฟฝ ๋ฌธํ๊ณผ ๋ฌธํ๋ฅผ ์ดํดํ์ฌ ์ธ๊ณํ ์๋์ ์๊ตฌ๋๋ ์ธ๋ฌธํ์ ๊ต์๊ณผ ์ธ์ด ๊ด๋ จ ์ค๋ฌด์ ๋ฅ๋ ฅ์ ๊ณ ๋ฐํ๋ค. ์ด๋ฌธํ ์ ๊ณต์ ์ธ์ด์ ๊ฐ๊ฐ, ๋
ผ๋ฆฌ์ ์ฌ๊ณ ๋ ฅ, ๋นํ์ ๋ถ์ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์ธ์ด ์ ๋ฌธํ ๊ทธ๋ฆฌ๊ณ ๋ค์ํ ๊ตญ๊ฐ์ ์ฌํยท๋ฌธํ์ ํน์ฑ์ ๋ํด ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 435 |
+
"{'instruction': '๋ฌธํยท์ธ๋ฅยท์ญ์ฌ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '๋ฌธํยท์ธ๋ฅยท์ญ์ฌ ์ ๊ณต์ ์ธ๊ฐ์ด ์ถํํ ์๊ธฐ๋ถํฐ ํ์ฌ๊น์ง์ ๋ฌธํ์ ์ฌํ๋ฅผ ํ๊ตฌํ ๊ณ , ์๋๋ณ๋ก ๋ฌธํ ๋ฐ ์ฌํ์ ํน์ฑ์ ์ฐ๊ตฌํ์ฌ ์ญ์ฌ์ ์์๋ฅผ ์ดํดํ๋ ๋ถ์ผ์ด๋ค. ๋ฌธํยท ์ธ๋ฅยท์ญ์ฌ ์ ๊ณต์ ๊ณ ๋์ ๊ด์ฐฐ๋ ฅ ๋ฐ ๋ถ์๋ ฅ์ด ํ์ํ๊ณ , ํ๊ตญ์ฌ ๋ฐ ์ธ๊ณ์ฌ ๊ทธ๋ฆฌ๊ณ ๋ค์ ํ ์ฌํ์ ยท์ญ์ฌ์ ํ์์ ๋ํด ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 436 |
+
"{'instruction': '์ฒ ํ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ฒ ํ ์ ๊ณต์์๋ ์์ ์ ๋๋ฌ์ผ ์์ฐ, ์ฐ์ฃผ, ์ฌํ์ ๋ํ ์๋ฏธ๋ฅผ ํต์ฐฐํ๊ณ ์ด๋ฅผ ๋
ผ๋ฆฌ ์ ์ผ๋ก ๋ถ์ํ์ฌ ์ธ๊ฐ ํ์์ ์ธ๊ณ์ ๋ํ ์๋ฏธ๋ฅผ ํ์ํ๋ค. ์ฒ ํ ์ ๊ณต์ ์์ฐ, ์ฐ์ฃผ, ์ฌ ํ๋ฅผ ๋
ผ๋ฆฌ์ ์ผ๋ก ๋ถ์ํ์ฌ ๊ทธ๊ฒ์ ์๋ฏธ๋ฅผ ํ์ํ๋ ํตํฉ์ ์ฌ๊ณ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์ธ๊ฐ ๋ณธ์ฑ๊ณผ ์กด์ฌ ๊ฐ์น, ์ถ์ ๋ณธ์ง์ ๋ํด ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 437 |
+
"{'instruction': '๋ฒํ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '๋ฒํ ์ ๊ณต์ ๋ฒ๊ณผ ๊ด๋ จ๋ ๊ธฐ์ด์ ์ธ ์ด๋ก ์์๋ถํฐ ์ฌ๋ฒ, ๊ณต๋ฒ, ์ฌํ๋ฒ ๋ฐ ๊ตญ์ ๊ด๊ณ์ ๊ด๋ จ๋ ๋ค์ํ ๋ฒํ ์ด๋ก ๋ค์ ์ต๋ํ๊ณ ์ด๋ฅผ ๊ตฌ์ฒด์ ์ธ ์ฌ๋ก์ ์ ์ฉํ๋ค. ๋ฒํ ์ ๊ณต์๋ ์ฌ๊ฑด๊ณผ ์ํฉ์ ๋ถ์ํ์ฌ ํ๋นํ ๊ฒฐ๋ก ์ ์ด๋ฅผ ์ ์๋ ์ฌ๊ณ ๋ ฅ๊ณผ ๊ณต์ ํ ํ๋จ๋ ฅ์ด ํ์ํ ๊ณ , ๋ฒ๋ฅ ๊ณผ ๊ด๋ จ๋ ์ฌํํ์ ๋ฐ ์ฌํ๋ฌธ์ ์ ๋ํด ๊ด์ฌ๊ณผ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.'}\n",
|
| 438 |
+
"{'instruction': '์ ์นยท๊ตญ์ ๊ด๊ณ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ ์นยท๊ตญ์ ๊ด๊ณ ์ ๊ณต์ ํ ๊ตญ๊ฐ ๋ด ํน์ ์ฌ๋ฌ ๊ตญ๊ฐ ๊ฐ์ ์ ์น์ ํ์์ ๋ถ์ํ๊ณ ๋นํ ํ๋ฉฐ, ๊ตญ๋ดยท์ธ์ ์ ์น์ ํ์๊ณผ ๊ตญ์ ๊ด๊ณ์ ๋ํ ์ด๋ก ๊ณผ ์ค์ ๋ฅผ ์ฐ๊ตฌํ๋ค. ์ ์นยท๊ตญ์ ๊ด ๊ณ ์ ๊ณต์๋ ๊ตญ์ ์ ํ์๊ณผ ๊ตญ๊ฐ ๊ฐ ๊ด๊ณยท์ธ๋ ฅ ๋ณํ ๋ฑ์ ํ์
ํ ์ ์๋ ๋ถ์๋ ฅ๊ณผ ๋
ผ๋ฆฌ ๋ ฅ์ด ํ์ํ๊ณ , ๊ตญ๋ดยท์ธ์ ๋ค์ํ ์ ์นยท๊ฒฝ์ ยท์ฌํ์ ์ด์์ ๋ํ ๋ฌธ์ ์์๊ณผ ๊ด์ฌ, ํฅ๋ฏธ ๊ฐ ์์ด์ผ ํ๋ค.'}\n",
|
| 439 |
+
"{'instruction': 'ํ์ ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': 'ํ์ ์ ๊ณต์ ๊ตญ๋ฏผ๊ณผ ์ ๋ถ ๊ฐ์ ์ํธ์์ฉ, ๋ค์ํ ๊ณต๊ณต๋ถ๋ฌธ์์์ ํ์ ๋ฐ ์ ์ฑ
๊ณผ ๊ด ๋ จ๋ ํ์์ ๋ถ์ํจ์ผ๋ก์จ ๊ณต๊ณต๋ถ์ผ ๊ด๋ฆฌ ๋ฑ ๊ตญ๊ฐ ์ด์์ ํจ์จ์ ์ผ๋ก ์ํํ ์ ์๋ ๋ฐฉ ์์ ๋ชจ์ํ๋ค. ํ์ ์ ๊ณต์๋ ์ฌํ ๋ฌธ์ ์ ๋ํ ํฉ๋ฆฌ์ ์ธ ํ๋จ ๋ฅ๋ ฅ๊ณผ ์ฌํ ๋ฌธ์ ๋ฅผ ํจ ์จ์ ์ผ๋ก ํด๊ฒฐํ ์ ์๋ ๋ฌธ์ ํด๊ฒฐ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ๊ณต๊ณต์ ๋ฌธ์ ์ ์ ์ฑ
๊ทธ๋ฆฌ๊ณ ์ ๋ถ์ ๊ณต๊ณต ๊ธฐ๊ด์ ์ฑ๊ฒฉ, ์
๋ฌด ๋ฑ์ ๋ํด ๊ด์ฌ๊ณผ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.'}\n",
|
| 440 |
+
"{'instruction': '์ฌํ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ฌํ ์ ๊ณต์ ์ฌํ์ ๋ค์ํ ํ์๊ณผ ๋ฌธ์ ๋ฅผ ๊ด์ฐฐ, ๋ถ์ํ๊ณ ์ด๋ฅผ ํตํด ๊ทธ๊ฒ์ ์๋ฏธ๋ฅผ ํด์ํจ์ผ๋ก์จ ์ฌํ ๊ตฌ์ฑ์๋ค ๊ฐ์ ์ํธ์์ฉ ๋ฐ ์ฌํ๊ตฌ์กฐ์ ๋ํ์ฌ ํ๊ตฌํ๊ณ ํ๋ ์ฌํ ์ ๋ฌธ์ ๋ค์ ํด๊ฒฐํ๋ค. ์ฌํ ์ ๊ณต์๋ ์ฌํ ํ์์ ๊ฐ๊ด์ ์ผ๋ก ๊ด์ฐฐํ๊ณ ๊ณผํ์ ์ผ๋ก ๋ถ ์ํ ์ ์๋ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ๊ฐ์ธ์ ยท์ฌํ์ ์์ค์ ๋ค์ํ ๋ฌธ์ ๋ฅผ ํ๊ตฌํ๋ ๊ฒ์ ๊ด ์ฌ๊ณผ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.'}\n",
|
| 441 |
+
"{'instruction': '์ฌ๋ฆฌ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ฌ๋ฆฌ ์ ๊ณต์ ์ธ๊ฐ์ ํ๋๊ณผ ์ ์ ๊ณผ์ , ๊ฐ์ธ ๊ฐ ์ํธ๊ด๊ณ ํ์ฑ์ ๊ดํ ์ฌ์ธต์ ์ฐ๊ตฌ๋ฅผ ํตํด ๊ฐ์ธ์ด ๊ฐ์ง ์ฌ๋ฆฌยท์ ์์ ๋ฌธ์ ๋ฅผ ํด๊ฒฐํ๊ณ ์กฐํ๋ก์ด ์ธ๊ฒฉ ํ์ฑ๊ณผ ์ธ์ฑ ํ๋ณต์ ์ ํ ๋ฐฉ์์ ๋ชจ์ํ๋ค. ์ฌ๋ฆฌ ์ ๊ณต์๋ ์ฌ๋ฆฌ ํ์์ ๋ํ ์คํ๊ณผ ์กฐ์ฌ๋ฅผ ๊ณผํ์ ์ผ๋ก ์ค์ํ ๊ณ , ์ธ๋ฐํ๊ฒ ๊ด์ฐฐํ๋ฉฐ, ๊ฒฐ๊ณผ๋ฅผ ๋
ผ๋ฆฌ์ ์ผ๋ก ํด์ํ ์ ์๋ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์ฌ๋๋ค์ ์ฌ๊ณ , ์ฑ๊ฒฉ, ํ๋ ๋ฐ ๊ทธ์ ๊ด๋ จ๋๋ ์ฌํ ํ์์ ๋ํ ๊ด์ฌ๊ณผ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.'}\n",
|
| 442 |
+
"{'instruction': '์ฌํ๋ณต์ง์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ฌํ๋ณต์ง ์ ๊ณต์ ๊ฐ์กฑยท์๋ยท์ฒญ์๋
ยท๋
ธ์ธยท์ฌ์ฑ ๋ฑ ์ฌํ ๊ตฌ์ฑ์์ ์ผ์ ํ ์ํ ์์ค ๋ฐ ๋ณด๊ฑด ์ํ๋ฅผ ํ๋ณดํ๊ธฐ ์ํด ์ฌํ ์ ์ฑ
๋ฐ ์ ๋์ ๊ด๋ จ๋ ์ด๋ก ๊ณผ ๋ฐฉ๋ฒ์ ํ๊ตฌํ๋ค. ์ฌ ํ๋ณต์ง ์ ๊ณต์๋ ์ฌํ ๊ตฌ์ฑ์์ ์ผ์ ํ ์ํ ์์ค ๋ฐ ๋ณด๊ฑด ์ํ๋ฅผ ์ง์ํ๊ธฐ ์ํ ์ฌํ ์ ์ฑ
๋ฐ ์ ๋ ๋ฑ์ ํ๋ฌธ์ ์ผ๋ก ์ดํดํ ์ ์๋ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์ฃผ์ ์ด์์ ๋ํ ๊ด์ฌ ๊ณผ ๋ด์ฌ์ ์ ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 443 |
+
"{'instruction': '๋ฌธํ์ ๋ณด์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '๋ฌธํ์ ๋ณด ์ ๊ณต์ ๊ฐ์ข
์ง์ ํ๋์ ํ์ํ ์ ๋ณด ๋ฐ ๋ฌธํ์ ์์ฑ์ ์ดํดํ๊ณ , ๋ค์ํ ๋ฏธ๋์ด๋ฅผ ํต๏ฟฝ๏ฟฝ ์ ๋ณด๋ฅผ ํจ์จ์ ์ผ๋ก ์์ง, ์ ๋ฆฌ, ๊ฐ๊ณต, ๊ด๋ฆฌ, ๋ฐฐํฌํ๋ ์ง์ ์ ๋ณด ๊ด๋ฆฌ ๋ฅ๋ ฅ ์ ๊ณ๋ฐํ๋ค. ๋ฌธํ์ ๋ณด ์ ๊ณต์๋ ์ํฉ์ ๋ฐ๋ฅธ ์ ํฉํ ๋์ ๋ฐ ์๋ฃ์ ์์งยท์ ๋ฆฌยท๊ฐ๊ณต ๋ฅ ๋ ฅ, ๋์ ๋ฐ ์๋ฃ์ ๊ด๋ฆฌ ๋ฐ ํ์ฉ์ ์ํ ์ ์ฐ ์ฒ๋ฆฌ ๋ฐ ํต๊ณ ์ฒ๋ฆฌ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ๋ค์ ํ ๋ถ์ผ์ ๋์์ ์๋ฃ์ ๋ํ ๊ด์ฌ๊ณผ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.'}\n",
|
| 444 |
+
"{'instruction': '์ธ๋ก ยทํ๋ณดยท๋ฏธ๋์ด์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ธ๋ก ยทํ๋ณดยท๋ฏธ๋์ด ์ ๊ณต์ ์ฌํ์ ์ฌ๋ฌ ์์ญ์์ ์ด๋ฃจ์ด์ง๋ ๋์ค ๋งค์ฒด์ ์ํต ๊ณผ์ ์ ๋ถ์, ์ฐ๊ตฌํ์ฌ ๋ค์ํ ๋ฏธ๋์ด ์ฝํ
์ธ ์ ์๊ณผ ๋ฐ๋์งํ ๋ฏธ๋์ดยท๋์ค ๋งค์ฒด์ ๋ฐ์ ๋ฐฉ ์์ ๋ํด ์ฐ๊ตฌํ๋ค. ์ธ๋ก ยทํ๋ณดยท๋ฏธ๋์ด ์ ๊ณต์๋ ์ต์ ํธ๋ ๋๋ฅผ ํ์
ํ๋ ๋ฅ๋ ฅ, ์ ํํ ์ ๋ณด ์ ๋ฌ์ ์ํ ์ปค๋ฎค๋์ผ์ด์
๋ฅ๋ ฅ, ๊ทธ๋ฆฌ๊ณ ๋งค๋ ฅ์ ์ฝํ
์ธ ์์ฐ์ ์ํ ์์ ์ฑ ๋ฐ ์ฐฝ ์๋ ฅ์ด ํ์ํ๊ณ , ๋์ค ๋งค์ฒด์ ๋ค์ํ ์ฝํ
์ธ , ๋ด๋ฏธ๋์ด ๋ฑ์ ๋ํ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 445 |
+
"{'instruction': '๋์ยท์ง์ญ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '๋์ยท์ง์ญ ์ ๊ณต์ ๋์ ๋ฐ ์ง์ญ๊ณผ ๊ด๋ จ๋ ๊ฐ์ข
๋ฌธ์ ๋ฅผ ๋ถ์ํ๊ณ ์ด๋ฅผ ํฉ๋ฆฌ์ ์ผ๋ก ํด ๊ฒฐํ ์ ์๋ ๋ฐฉ์์ ๋ชจ์ํ์ฌ ๊ตญํ ๋ฅผ ๊ฒฝ์ ์ , ์ฌํ์ , ๋ฌธํ์ ์ธก๋ฉด์์ ์กฐํ๋กญ๊ณ ๊ท ํ ์๊ฒ ๋ฐ์ ํ๋ ๋ฐฉ์์ ๋ํด ์ฐ๊ตฌํ๋ค. ๋์ยท์ง์ญ ์ ๊ณต์๋ ๋์์ ๊ธฐ๋ฅ๊ณผ ์ญํ , ๊ตํต ์ฒด ๊ณ, ์ง์ญ์ ํน์ฑ์ ์ ํฉํ ์ฌํ๊ธฐ๋ฐ์์ค ๋ฑ์ ๋ํ ์ดํด ๋ฅ๋ ฅ๊ณผ ์ฐฝ์๋ ฅ, ๊ณต๊ฐ์ง๊ฐ๋ ฅ์ด ํ์ํ๊ณ , ๊ณต๊ฐ์ ๋ํ ํธ๊ธฐ์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 446 |
+
"{'instruction': '๊ตฐ์ฌยท๊ตญ๋ฐฉยท์๋ณด์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '๊ตฐ์ฌยท๊ตญ๋ฐฉยท์๋ณด ์ ๊ณต์ ๊ตฐ์ฌ ์ ์ฑ
๋ฐ ์ ๋ต์ ํ์ํ ์ ๋ฌธ ์ง์์ ์ต๋ํ๊ณ , ๋ค์ํ ์ํฉ์์ ์ค์ง์ ์ผ๋ก ๋์ฒ ๊ฐ๋ฅํ ๊ตฐ์ฌ ์ด์ฉ ๋ฅ๋ ฅ์ ๊ณ๋ฐํ๋ฉฐ, ํฌ์ฒ ํ ๊ตญ๊ฐ๊ด์ ํจ์ํ ์ฌ ๊ตญ๊ฐ์ ์๋ณด์ ๊ธฐ์ฌํ๋ค. ๊ตฐ์ฌยท๊ตญ๋ฐฉยท์๋ณด ์ ๊ณต์๋ ๊ตฐ๋ณ๋ ฅ์ ํต์ํ๋ ๋ฆฌ๋์ญ๊ณผ ์ฌ ๋ฐ๋ฅธ ๊ตญ๊ฐ๊ด, ํ๋ จ ๋ฐ ์ค์ ์ ๋๋นํ ๊ฐํ ์ฒด๋ ฅ๊ณผ ์ ์ ๋ ฅ, ํต์ฐฐ๋ ฅ ๋ฐ ํ๋จ๋ ฅ์ด ํ์ํ๊ณ , ๊ตญ๊ฐ์ ์๋ณด ํ๊ฒฝ์ ๋ํ ์ดํด์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 447 |
+
"{'instruction': '๊ฒฝ์ฐฐยท์๋ฐฉยท์์ ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '๊ฒฝ์ฐฐยท์๋ฐฉยท์์ ์ ๊ณต์ ๊ฐ์ข
๋ฒ์ฃ ํ์์ ์์ธ๊ณผ ๋์ฑ
์ ์ฐ๊ตฌํ๊ฑฐ๋ ์์ ๊ด๋ฆฌ์ ๋ ํ ์ด๋ก ์ ์ง์ ๋ฐ ์ค๋ฌด ๋ฅ๋ ฅ์ ์ต๋ํจ์ผ๋ก์จ ๋ฒ์ฃ, ์ํ ๋ฐ ์ฌ๋์ผ๋ก๋ถํฐ ๊ตญ๋ฏผ์ ์๋ช
๊ณผ ์ฌ์ฐ์ ๋ณดํธํ๋ค. ๊ฒฝ์ฐฐยท์๋ฐฉยท์์ ์ ๊ณต์๋ ํ์ฅ์์์ ๋๋ฐ ์ํฉ์ ํด๊ฒฐํ ์ ์๋ ๋ฆฌ๋์ญ๊ณผ ์กฐ์ง์ ์ฌ๊ณ , ์ฑ
์๊ฐ, ์ํฉํ๋จ๋ ฅ์ด ํ์ํ๊ณ , ์ฌํ ๋ฌธ์ ๋ฐ ๋ฒ์ฃ ํด๊ฒฐ, ๊ฐ์ข
์ฌ ๊ฑดยท์ฌ๊ณ ์ ๊ด์ฌ๊ณผ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.'}\n",
|
| 448 |
+
"{'instruction': '๊ฒฝ์์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '๊ฒฝ์ ๊ด๋ จ ์ ๊ณต์์๋ ๊ธฐ์
๊ฒฝ์์ ํ์ํ ์ฌ๋ฌ ๊ฐ์ง ์ด๋ก ๊ณผ ๊ธฐ๋ฒ์ ์ฐ๊ตฌํ๊ณ , ์ํ ์ ํ์ํ ๋ค์ํ ์ํ ๋๋ ์๋น์ค๋ฅผ ์์ฐ, ์ ํต, ํ๋งคํ๋ ์ ๊ณผ์ ์ ๋ํด ์ฐ๊ตฌํ๋ค. ๊ฒฝ ์ํ ๊ด๋ จ ์ ๊ณต์์๊ฒ๋ ํจ์จ์ ์ธ ๊ฒฝ์ ํ๋์ ๋ํ ๋ถ์ยท์คํ ๋ฅ๋ ฅ๊ณผ ํฉ๋ฆฌ์ ์์ฌ๊ฒฐ์ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์กฐ์ง์ ์ฑ๊ณผ๋ฅผ ๋์ด๊ธฐ ์ํ ๊ฒฝ์ ํ๋ ๋ฐ ๊ธ๋ก๋ฒ ๊ธฐ์
ํ๊ฒฝ์ ๋ณํ์ ๋ํ ๊ด์ฌ๊ณผ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.'}\n",
|
| 449 |
+
"{'instruction': '๊ฒฝ์ ยท๊ธ์ตยท๋ถ๋์ฐ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '๊ฒฝ์ ยท๊ธ์ตยท๋ถ๋์ฐ ์ ๊ณต์ ๊ตญ๊ฐ, ์กฐ์ง, ๊ฐ์ธ ๋ฑ์ ๊ฒฝ์ ์ํฉ์ ๋ถ์ยท์ง๋จํ์ฌ ๊ฒฝ์ ๊ด ๋ จ ๋ฌธ์ ์ ์ ์ ํ ๋์ํ ์ ์๋ ๋ฐฉ๋ฒ์ ์ฐ๊ตฌํ๋ค. ๋ฐ๋ผ์ ๊ธ์ต์ ํ๋ฆ์ ํ์
ํ๊ธฐ ์ ํ ๋ถ์๋ ฅ๊ณผ ์ํ ๋ฅ๋ ฅ, ์ ๋ณด์ฒ๋ฆฌ ๋ฐ ํ์ฉ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ๊ตญ๊ฐ ๋ฐ ์ธ๊ณ ๊ฒฝ์ ์ ํ๋ฆ์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 450 |
+
"{'instruction': '๋ฌด์ญยท๋ฌผ๋ฅ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '๋ฌด์ญยท๋ฌผ๋ฅ ์ ๊ณต์ ๋ฌผํยท์๋น์คยท๊ธฐ์ ยท์์ ๋ฑ์ ๊ตญ๊ฐ ๊ฐ ์ด๋๊ณผ ๋ฌผํ์ ์์ฐ์์ ๊ณต ๊ธ์ ์ด๋ฅด๋ ์ผ๋ จ์ ๊ณผ์ ์ ๊ฒฝ์ ์ ์ด๊ณ ํจ์จ์ ์ผ๋ก ๊ด๋ฆฌํ๋ ๋ฐฉ๋ฒ ๋ฐ ๊ตญ์ ๊ฒฝ์ ๊ต๋ฅ์ ๊ด๋ จ๋ ์ฌ์์ ๋ํด ์ฐ๊ตฌํ๋ค. ๊ทธ๋ฌ๋ฏ๋ก ๋ฌด์ญยท๋ฌผ๋ฅ ์ ๊ณต์๋ ์ธ๊ณ ์๋น๊ฒฝํฅ ๋ฐ ์์์ ํ๋ฆ์ ๋ถ์ํ๊ณ ์ดํดํ๋ ๋ฅ๋ ฅ๊ณผ ๊ด๋ จ ์ ๋ณด๋ฅผ ๊ฒฝ์, ๊ฒฝ์ , ๋ฒ, ๋ณดํ, ์ธ๊ตญ์ด ๋ฑ์ ์ธ์ ํ๋ฌธ๊ณผ ์ฐ๊ณํ ์ ์๋ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ๊ตญ์ ๊ฒฝ์ ๊ต๋ฅ์ ๋ํด ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 451 |
+
"{'instruction': 'ํ๊ณยท์ธ๋ฌด์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': 'ํ๊ณยท์ธ๋ฌด ์ ๊ณต์ ๊ธฐ์
์ ํ๊ณ์ ๋ณด๋ฅผ ์ฒด๊ณ์ ์ธ ๋ฐฉ๋ฒ์ผ๋ก ์ฐ์ถยท๋ถ์ํ๊ณ , ์ธ๋ฌด์ ๊ด ๋ จ๋ ํจ์จ์ ์ธ ์์ฌ๊ฒฐ์ ๋ฐฉ์์ ํ์ํ๋ค. ๋ฐ๋ผ์ ์ดํด๊ด๊ณ๋ฅผ ๋ถ์ํ๊ณ ์์ธกํ ์ ์๋ ์๋ฆฌ ๋ฅ๋ ฅ ๋ฐ ๋
ผ๋ฆฌ๋ ฅ์ด ํ์ํ๊ณ , ๊ฒฝ์ ๋ฐ ๊ธฐ์
์์์ ํจ์จ์ ๋ฐฐ๋ถ์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 452 |
+
"{'instruction': '๊ด๊ด์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '๊ด๊ด ์ ๊ณต์์๋ ๊ด๊ด์ ํตํ ์ถ์ ์ง ํฅ์์ ์ํด ๊ตญ๋ด์ธ ๊ด๊ด์ง๋ฅผ ํ์
ํ๊ณ , ๊ด๊ด ํ์์ ๋ํ ํ๋ฌธ์ ๋ถ์๊ณผ ์ค๋ฌด์ ์ธ ์ ์ฑ
์ ์๋ฆฝํ๋ค. ๊ทธ๋ฌ๋ฏ๋ก ๊ด๊ด ์ํ์ ๊ฐ๋ฐ ๋ฐ ๊ธฐํ ๋ฅ๋ ฅ, ์ธ๊ตญ์ด ๋ฅ๋ ฅ, ์ธ์ ๋คํธ์ํฌ ํ์ฑ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์ฌํ ๋ฐ ๊ด๊ด์ ๋ํ ํฅ๋ฏธ ์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 453 |
+
"{'instruction': '๊ต์ก์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '๊ต์ก ์ ๊ณต์ ๊ต์ก ๊ธฐ๊ด ์ํ์ ๊ต์กํ๋์ ์ด์ฒด์ ์ผ๋ก ์ดํดํ๊ณ , ๊ต์ก๊ณผ ๊ด๋ จํ ๋ค ์ํ ๋ฌธ์ ์ ๋ํด ํด๊ฒฐ ๋ฐฉ๋ฒ์ ๋ชจ์ํ๋ค. ๋ฐ๋ผ์ ๊ต์ก ํ์ ๋ฐ ๊ต์ก ์ ์ฑ
๋ฑ์ ๋ํ ํ๋จ ๋ ฅ๊ณผ ๊ต์ก ๋ฌธ์ ์ ์์ธ์ ์ฒด๊ณ์ ์ผ๋ก ํ์
ํ๋ ๋ถ์๋ ฅ์ด ํ์ํ๊ณ , ์ธ๊ฐ๊ณผ ๊ต์ก ํ๋์ ๋ํ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 454 |
+
"{'instruction': '์ ์๊ต์ก์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ ์๊ต์ก ์ ๊ณต์์๋ ์ยท์ ์๊ธฐ ๋ฐ๋ฌ์ ๋ํ ์ฒด๊ณ์ ์ธ ์ด๋ก ์ ์ดํดํ๊ณ , ์ด๋ฅผ ํ์ฅ ์ ์ ์ฉํด ๋ณด๋ ์ค์ต์ ํตํด ์ยท์ ์์ ์ ์ฒด์ , ์ ์์ ๋ฐ๋ฌ์ ๋ฐ๋ฅธ ๊ต์ก ๋ฐฉ๋ฒ์ ๋ํด ํ ์ตํ๋ค. ๋ฐ๋ผ์ ์๋์ ๋ฐ๋ฌ, ์ฌ๋ฆฌ์ ํ๋, ๋ถ๋ชจ ๊ต์ก ๋ฑ ์๋์ ๋ํ ์ ๋ฌธ์ ์ธ ์ง์๊ณผ ์ ๋์ ์์ค์ ๋ง๋ ์์ฌ์ํต ๋ฅ๋ ฅ๊ณผ ์ธ์ด ๊ตฌ์ฌ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์๋ ๋ณด์ก์ ๋ํ ์ฑ
์ ๊ฐ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 455 |
+
"{'instruction': '์ด๋ฑ๊ต์ก์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ด๋ฑ๊ต์ก ์ ๊ณต์์๋ ๋ง 6โผ12์ธ ์๋์ ๋์์ผ๋ก, ๊ทธ๋ค์ ์ฌ๋ฆฌ์ ํน์ฑ์ ํ์
ํ๊ณ ๊ตญ์ด, ์ํ, ๋ฏธ์ ๋ฑ ๋ค์ํ ๊ต๊ณผ ์ด๋ก ์ ์ ์ฉํ์ฌ ๊ต์ก ํ์ฅ์์ ๊ต์ฌ๋ก์ ์
๋ฌด๋ฅผ ์ํ ํ ์ ์๋ ๋ฐฉ๋ฒ๋ค์ ํ์ตํ๋ค. ๋ฐ๋ผ์ ์ด๋ฑํ๊ต ๊ต๊ณผ์ ๋ํ ๊ธฐ์ด ์ง์๊ณผ ๋ค์ํ ๊ต์ก ๋ฐฉ๋ฒ์ ๊ณ ์ํ ์ ์๋ ์ฐฝ์๋ ฅ, ์ด๋ฆฐ์ด์ ๋ฐ๋ฌ์ ๋ํ ๋์ ์ดํด ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์ด๋ฑ ๊ต์ก์ ๋ํ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 456 |
+
"{'instruction': '์ค๋ฑ๊ต์ก์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ค๋ฑ๊ต์ก ์ ๊ณต์์๋ ๋ง 13โผ18์ธ ์คยท๊ณ ๋ฑํ๊ต ํ์๋ค์ ๋์์ผ๋ก ๊ทธ๋ค์ ๊ฐ๋ฅด์น ๊ธฐ ์ํด ํน์ ๊ต๊ณผ๋ชฉ์ ์ ๊ณตํ๊ณ , ๊ต์ก ํ์ฅ์์ ๊ต์ฌ๋ก์ ์
๋ฌด๋ฅผ ์ํํ ์ ์๋ ๋ฐฉ๋ฒ ๋ค์ ํ์ตํ๋ค. ์ด์ ์คยท๊ณ ๋ฑํ๊ต ๊ต๊ณผ์ ๋ํ ์ง์, ํํ ๊ต์ก๊ณผ์ ๋ฐ ๊ต์ก์ ์ฑ
์ ๋ํ ์ดํด, ํ์ ์ง๋ ๋ฅ๋ ฅ, ๊ต์ก ํ์์ ์ข
ํฉ์ ์ผ๋ก ์ดํดํ๊ณ ๋นํํ ์ ์๋ ๋ฅ๋ ฅ์ด ํ์ํ ๊ณ , ์คยท๊ณ ๋ฑํ๊ต ๊ต์ก์ ๋ํ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 457 |
+
"{'instruction': 'ํน์๊ต์ก์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': 'ํน์๊ต์ก ์ ๊ณต์์๋ ์ ์ฒด์ , ์ ์ ์ , ์ฌํ์ ๋ฐ๋ฌ ์ฅ์ ๋ฅผ ์ง๋ ํ์๋ค์๊ฒ ์ ํฉํ ๊ต์ก์ ์ ๊ณตํ์ฌ ๊ทธ๋ค์ด ์ฌํ ๊ตฌ์ฑ์์ผ๋ก์ ์ํํ ์ ์๋๋ก ๊ต์กํ๋ ํน์๊ต์ก ์ ๋ฌธ ๊ฐ๋ฅผ ์์ฑํ๋ค. ๋ฐ๋ผ์ ํน์๊ต์ก์ ๋ํ ์ ๋ฌธ์ ์ธ ์ง์์ ๋ฐํ์ผ๋ก ๋ค์ํ ์ํฉ์์์ ๋์ฒ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์ฅ์ ํ์์ ๋ํ ์ฌ๋, ๊ต์ฌ๋ก์์ ์๋ช
์์๊ณผ ๋ด์ฌ ๋ฐ ํฌ์ ์ ์ ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 458 |
+
"{'instruction': '์ํ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ํ ์ ๊ณต์์๋ ๋ค์ํ ๋ฌผ์ฒด๋ ๊ณต๊ฐ, ํ์์ ๊ณ๋ํํ์ฌ ๋ถ์ยท์ค๋ช
ํ๊ณ , ์์ ์ฒด๊ณ ์ ์ฑ์ง์ ์ฐ๊ตฌํ๋ค. ์ํ ์ ๊ณต์ ๋
ผ๋ฆฌ์ ์ธ ์ฌ๊ณ ์ ๋ถ์๋ ฅ, ์ถ๋ฆฌ๋ ฅ์ด ํ์ํ๊ณ , ๋ฌธ์ ํด ๊ฒฐ ๊ณผ์ ์ ์ค์ํ๊ฒ ์๊ฐํ๋ฉฐ ์์ ์ฑ์ง์ ๋ํ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.'}\n",
|
| 459 |
+
"{'instruction': 'ํต๊ณ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': 'ํต๊ณ ์ ๊ณต์์๋ ์ฐ๊ตฌ๋ชฉ์ ์ ๋ถํฉํ๋ ์๋ฃ๋ฅผ ์์ง, ์์ฝํจ์ผ๋ก์จ ์ฌํ ๊ฐ ๋ถ์ผ์ ๋ค์ํ ํน์ฑ ๋ฐ ์ ๋ณด๋ฅผ ๋ถ์ํ๊ณ ํด์ํ๋ ์ด๋ก ๊ณผ ๋ฐฉ๋ฒ์ ์ฐ๊ตฌํ๋ค. ํต๊ณ ์ ๊ณต์ ๋ค์ํ ํต๊ณ ๋ฐฉ๋ฒ๋ก ์ ์ ์ฉํ ์ ์๋ ์์ฉ๋ ฅ๊ณผ ์ ๋ณด๋ฅผ ๋ถ์ํ๊ณ ์ถ๋ฆฌํ ์ ์๋ ๋ฅ๋ ฅ์ด ํ์ํ ๊ณ , ์ํ์ ๋ํ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.'}\n",
|
| 460 |
+
"{'instruction': '๋ฌผ๋ฆฌยท์ฒ๋ฌธ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '๋ฌผ๋ฆฌยท์ฒ๋ฌธ ์ ๊ณต์์๋ ๋ชจ๋ ์์ฐํ์์ ์กด์ฌํ๋ ํ์ ์๋ฆฌ์ ์ฐ์ฃผ์์ ์ผ์ด๋๋ ๋ค์ํ ํ์์ ๊ด์ธกํ์ฌ ์ฐ์ฃผ์ ์ง์์ ๋ฒ์น์ ํ๊ตฌํ๋ค. ๋ฌผ๋ฆฌยท์ฒ๋ฌธ ์ ๊ณต์ ๋์ ๋ณด์ด ์ง ์๋ ์์ ์ธ๊ณ๋ฅผ ํฌํจํด ์ง๊ตฌ์ ์ฐ์ฃผ์ ๋ํ ์ฐฝ์์ ์ธ ์ฌ๊ณ ๋ฅ๋ ฅ, ์ฒ์ฒด๋ ์ง๊ตฌ์์ ์ผ์ด๋๋ ํ์์ ๋ถ์ํ๋ ๋
ผ๋ฆฌ์ ์ธ ์ฌ๊ณ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ๋ค์ํ ์์ฐ ํ์์ ๋ํ ๊ด ์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 461 |
+
"{'instruction': '์ง์งยท๋๊ธฐยทํด์์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ง์งยท๋๊ธฐยทํด์ ์ ๊ณต์์๋ ์ง๊ตฌ๋ฅผ ๊ตฌ์ฑํ๋ ๋ฌผ์ง์ ์ฑ๋ถ, ๊ตฌ์กฐ, ํ์ฑ ๋ฐ ๋ณํ ๊ณผ์ ๋ฑ์ ์ฐ๊ตฌํ๊ณ ์ง๊ตฌ๋ฅผ ๋๋ฌ์ผ ๋๊ธฐ์ ํด์์ ์์ฐ ํ์์ ๋ํด ํ๊ตฌํ๋ค. ์ง์งยท๋๊ธฐยทํด ์ ์ ๊ณต์ ๊ณผํ ๊ต๊ณผ ์ ๋ฐ์ ๋ํ ๊ธฐ๋ณธ ์ง์ ๊ทธ๋ฆฌ๊ณ ์์ฐ ํ๊ฒฝ์ ๋ํ ๊ด์ฐฐ๋ ฅ๊ณผ ํ๏ฟฝ๏ฟฝ๋ ฅ ์ด ํ์ํ๊ณ , ์์ฐ, ๋๊ธฐ, ๋ฐ๋ค์์ ๋ฐ์ํ๋ ์์ฐ ํ์์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 462 |
+
"{'instruction': '๋๋ฆผยท์ถ์ฐยท์์ฐ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '๋๋ฆผยท์ถ์ฐยท์์ฐ ์ ๊ณต์์๋ ๋ยท์ถยท์์ฐ๋ฌผ์ ํจ์จ์ ์ผ๋ก ๊ด๋ฆฌํ๊ณ ๊ฐ๋ฐ, ์์ฐํ๊ธฐ ์ ํ ์ฐ๊ตฌ๋ฅผ ์ํํ๋ฉฐ, ๋ยท์ถยท์์ฐ ์ํ์ ์์ฐ, ๊ฐ๊ณต์ฒ๋ฆฌ, ์ ํต ๋ฑ์ ๋ํ ์ง์์ ์ต๋ํ ์ฌ ๊ด๋ จ๋ ๋ฌธ์ ๋ค์ ํ์ํ๊ณ ํด๊ฒฐ๋ฐฉ๋ฒ์ ๋ชจ์ํ๋ค. ๋๋ฆผยท์ถ์ฐยท์์ฐ ์ ๊ณต์ ์ํํ์ ๊ธฐ์ด์ ์๋ช
๊ณผํ ๋ฐ ํํ ์ง์์ด ํ์ํ๊ณ , ๋์ด์ด ํ๊ฒฝ๊ณผ ๋ยท์๋ฌผ์ ๋ํ ๊ด์ฌ์ด ์์ด ์ผ ํ๋ค.'}\n",
|
| 463 |
+
"{'instruction': '์ํ์์์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ํ์์ ์ ๊ณต์์๋ ๊ฐ์ธ, ๊ฐ์ , ์ง๋จ์ ์์ ์ญ์ทจ์ ์์ํ ๋ฌธ์ ๋ฅผ ๋ถ์ํ๊ณ , ๊ฑด๊ฐ ํ ์ถ์ ์ ์ง๋ฅผ ๋ชฉ์ ์ผ๋ก ํ๋ ์์ํ์ ์ง์ ํฅ์์ ์ํด ๊ฑด๊ฐ ๋ฐ ์์์ ๊ด๋ฆฌยท์ฆ์งํ ๋ ๋ฐฉ์์ ๋ํ์ฌ ์ฐ๊ตฌํ๋ค. ์ํ์์ ์ ๊ณต์ ์ํ, ์์์ ๋ํ ์ค์์ฑ์ ์ดํดํ๊ณ ์ ํ ๊ฐ๋ฐ์ด๋ ์ฒจ๋จ๊ธฐ์ ์ ์ฉ์ ์ํ ์ฐฝ์๋ ฅ, ์์ฉ๋ ฅ์ด ํ์ํ๋ฉฐ, ๋ค์ํ ์ํ์ ์ฑ๋ถ๊ณผ ์์์์ ๋ํด ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.'}\n",
|
| 464 |
+
"{'instruction': '๊ธฐ๊ณ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '๊ธฐ๊ณ ์ ๊ณต์์๋ ๋ค์ํ ์ฐ์
๋ถ์ผ์ ํ ๋๊ฐ ๋๋ ๊ฐ์ข
๊ธฐ๊ณ์ ์ฅ๋น์ ์ค๊ณ, ์ ์, ์ด ์ฉ, ๊ด๋ฆฌ ๋ฑ์ ๋ํ ์ด๋ก ๊ณผ ์์ฉ์ ๋ํด ์ฐ๊ตฌํ๋ค. ๊ธฐ๊ณ ์ ๊ณต์์๊ฒ๋ ์ํ, ๋ฌผ๋ฆฌ ๋ฑ ๊ธฐ์ด ๊ณผํ ์ง์์ด ํ์ํ๊ณ , ๊ธฐ๊ณ ์๋ ์๋ฆฌ๋ฅผ ์ดํดํ๊ธฐ ์ํ ๋
ผ๋ฆฌ๋ ฅ๊ณผ ์ง์ ๋ฐ ์ด๋ก ์ ์์ฉ ํ ์ ์๋ ๋ฅ๋ ฅ์ด ํ์ํ๋ฉฐ ๊ธฐ๊ณ, ์๋์ฐจ, ์ ๊ธฐ, ์ ์ ๋ฑ์ ๋ํ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.'}\n",
|
| 465 |
+
"{'instruction': '์ ๊ธฐยท์ ์์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ ๊ธฐยท์ ์ ์ ๊ณต์์๋ ์ ๊ธฐ ๋ฐ ์๊ธฐ์ ํ๋ฆ๊ณผ ๊ณ ์ฒด, ๊ธฐ์ฒด, ์ง๊ณต ๋ด์์์ ์ ์ ์ด๋ ์ ํ๊ตฌํ๊ณ ์ด๊ฒ์ ์ค์ํ์ ์์ฉํ๋ ๋ฐฉ๋ฒ์ ์ฐ๊ตฌํ๋ ํ๋ฌธ์ด๋ค. ์ ๊ธฐยท์ ์ ์ ๊ณต์์ ๊ฒ๋ ์๋ก์ด ๊ธฐ๋ฅ์ ์ ๊ธฐยท์ ์์ ํ์ผ๋ก ๊ตฌํํ ์ ์๋ ์ฐฝ์๋ ฅ, ๋ถ์์ ์ฌ๊ณ ๊ฐ ํ์ํ ๊ณ , ์ ๊ธฐ์ ์ ์์คํ
์ดํด๋ฅผ ์ํ ๋
ผ๋ฆฌ์ ์ฌ๊ณ ๊ฐ ํ์ํ๋ฉฐ, ์ํ, ๋ฌผ๋ฆฌํ์ ๋ํ ํฅ๋ฏธ๊ฐ ์์ด์ผ ํ๋ค.'}\n",
|
| 466 |
+
"{'instruction': '์ปดํจํฐยท์ํํธ์จ์ด์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ปดํจํฐยท์ํํธ์จ์ด ์ ๊ณต์์๋ ์ปดํจํฐ์ ํ๋์จ์ด์ ์ํํธ์จ์ด, ๋ฉํฐ๋ฏธ๋์ด ๋ฑ ์ปดํจํฐ์ ๊ด๋ จ๋ ๊ธฐ์ ๊ณผ ์ง์์ ์ต๋ํ๊ณ ์ด๋ฅผ ๋ฐํ์ผ๋ก ๋ค์ํ ๋ถ์ผ์ ์ ์ฉํ๋ ํ๋ฌธ ์ด๋ค. ์ปดํจํฐยท์ํํธ์จ์ด ์ ๊ณต์์๊ฒ๋ ์๋ก์ด ์ํํธ์จ์ด ๊ฐ๋ฐ์ ํ์ํ ์ปดํจํฐ ํ๋ก ๊ทธ๋๋ฐ ์ธ์ด์ ๋ํ ์ง์๊ณผ ๋
ผ๋ฆฌ์ ์ฌ๊ณ ๋ฅ๋ ฅ ๋ฐ ์ฐฝ์๋ ฅ์ด ํ์ํ๊ณ , ์ํ์ ๋ํ ํฅ๋ฏธ์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 467 |
+
"{'instruction': '์ ๋ณดยทํต์ ยท๋ณด์์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ ๋ณดยทํต์ ยท๋ณด์ ์ ๊ณต์์๋ ์ปดํจํฐ๊ณตํ๊ธฐ๋ฐ ์ง์์ ๋ฐํ์ผ๋ก ICT(์ ๋ณดํต์ ๊ธฐ์ ) ๋ฐ ๋ค์ํ ์ตํฉ์ฐ์
๋ถ์ผ์ ์ ๋ณดยทํต์ ยท๋ณด์์ ๊ดํ ์ด๋ก ์ ์ต๋ํ๊ณ ์ด๋ฅผ ์ ์ฉํ๋ ํ๋ฌธ์ด ๋ค. ์ ๋ณดยทํต์ ยท๋ณด์ ์ ๊ณต์์๊ฒ๋ ์ํ, ํต๊ณ, ๋ฌผ๋ฆฌ ๋ฑ์ ๋ํ ๊ธฐ๋ณธ ์ง์๊ณผ ์ ๊ธฐ, ์ ์, ์ปด ํจํฐ์ ๋ํ ๊ธฐ์ด์ง์, ๋
ผ๋ฆฌ์ ์ฌ๊ณ ๋ ฅ, ์ฐฝ์์ ์ด๋ฉฐ ์ ์ฐํ ์ฌ๊ณ , ์ ํํ ํ๋จ๋ ฅ์ด ํ์ ํ๊ณ , ๋ค์ํ ์ฐ์
์ ์ ๋ณดยทํต์ ๋ถ์ผ์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 468 |
+
"{'instruction': '์ฐ์
๊ณตํ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ฐ์
๊ณตํ ์ ๊ณต์์๋ ์ ํ ์์ฐ๊ณผ ์ ๋ฌ์ ์ ๊ณผ์ ์ ํ์ํ ๊ธฐ๊ณ, ๊ธฐ์ , ์ธ์ ์์ ๋ฑ ์ ๊ดํ ์ต์ ์ ์์คํ
์ ์ฐ๊ตฌํ๋ค. ์ฐ์
๊ณตํ ์ ๊ณต์์๊ฒ๋ ๋ฐ์ดํฐ์ ๊ธฐ๋ฐํ์ฌ ์์คํ
์ ํจ์จ์ฑ์ ๋์ผ ์ ์๋ ๋ฌธ์ ํด๊ฒฐ๋ฅ๋ ฅ๊ณผ ์ฌ๊ณ ๋ ฅ์ด ํ์ํ๊ณ , ๊ณตํ๋ฟ ์๋๋ผ ๊ฒฝ์ํ ๋ฑ ์ ์ฌํ๊ณผํ์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 469 |
+
"{'instruction': '๊ฑด์ถยทํ ๋ชฉ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '๊ฑด์ถยทํ ๋ชฉ ์ ๊ณต์์๋ ๊ฑด์ถ๋ฌผ ๋ฐ ๋๋ก, ํญ๊ตฌ, ๋ ๋ฑ์ ์ฌํ๊ธฐ๋ฐ์์ค์ ์ค๊ณํ๊ณ ๊ฑด์ถ ํ๊ธฐ ์ํ ์ด๋ก ์ ์ฐ๊ตฌํ๋ค. ๊ฑด์ถยทํ ๋ชฉ ์ ๊ณต์์๊ฒ๋ ์ํ, ๊ณผํ ๋ฑ ๊ธฐ์ด๋ถ์ผ๋ฅผ ๋น๋กฏํ ์ฌ ์ปดํจํฐ ํ์ฉ ๋ฅ๋ ฅ, ๊ณต๊ฐ ์ง๊ฐ๋ ฅ ๋ฐ ๊ณต๊ฐ ์ดํด๋ ฅ์ด ํ์ํ๊ณ , ์๋ฆ๋ค์ด ๊ฑด์ถ ์ค๊ณ๋ฅผ ์ ํ ๋ฌธํ์ ยท๋ฏธ์ ๊ฐ๊ฐ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 470 |
+
"{'instruction': 'ํ๊ฒฝยท์๋์ง์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': 'ํ๊ฒฝยท์๋์ง ์ ๊ณต์์๋ ์์ฐ์ ๊ตฌ์ฑํ๋ ๋ฌผ, ๋๊ธฐ, ํ ์๊ณผ ์ํ๊ณ์์ ์ผ์ด๋๋ ํ ๊ฒฝ ๋ณํ, ์๋์ง์ ์์ฐ ๊ณผ์ ๋ฐ ๋ณํ ๊ณผ์ ๋ฑ์ ํ๊ตฌํ์ฌ ์ธ๊ฐ์ ์ถ์ ์ง ํฅ์์ ๋ชฉ์ ์ผ ๋ก ํ๋ ํ๋ฌธ์ด๋ค. ํ๊ฒฝยท์๋์ง ์ ๊ณต์์๊ฒ๋ ์คํ ์ฐ๊ตฌ๋ฅผ ์ํ ๋ถ์์ ์ฌ๊ณ ์ ์ง์ค๋ ฅ, ํํ, ๋ฌผ๋ฆฌ, ์ํ ๋ฑ ๊ธฐ์ด๊ณผํ ๋ถ์ผ์ ๋ํ ์ดํด ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ํ๊ฒฝ๋ฌธ์ ๋ฐ ์์ฐ์ ๋ ํ ๋์ ์ดํด์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 471 |
+
"{'instruction': '์๋ช
๊ณผํยท์๋ช
๊ณตํ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์๋ช
๊ณผํยท์๋ช
๊ณตํ ์ ๊ณต์์๋ ์๋ช
์ฒด์ ๋ํ ์ง์๊ณผ ์ดํด๋ฅผ ๋ฐํ์ผ๋ก ๋ค์ํ ์ ๋ช
ํ์์ ๊ณผ์ ์ ํ๊ตฌํ๋ฉฐ, ์๋ฃ, ํ๊ฒฝ ๋ฑ์ ์ฐ๊ตฌ์ ์ฐ์
์ ๊ธฐ์ด๊ฐ ๋๋ ํ๋ฌธ์ด๋ค. ์๋ช
๊ณผํยท์๋ช
๊ณตํ ์ ๊ณต์์๊ฒ๋ ์์ฐ๊ณผํ ๋ถ์ผ์ ๋ํ ๊น์ด ์๋ ์ดํด์ ์ง์ ๊ทธ๋ฆฌ๊ณ ๊ด์ฐฐ ๋ ฅ, ๋
ผ๋ฆฌ์ ์ฌ๊ณ , ๋ถ์๋ ฅ, ํต์ฐฐ๋ ฅ์ด ํ์ํ๊ณ , ๋ค์ํ ์๋ช
ํ์๊ณผ ์๋ช
์ฒด์ ๊ด์ฌ์ด ์์ด ์ผ ํ๋ค.'}\n",
|
| 472 |
+
"{'instruction': 'ํํยทํํ๊ณตํ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': 'ํํยทํํ๊ณตํ ์ ๊ณต์์๋ ํํ ๋ฐ์์ ์๋ฆฌ๋ฅผ ์ค์ํ์ ์์ฉํ๋ ๊ธฐ์ ๊ณผ ๋ฐฉ๋ฒ์ ์ฐ๊ตฌํ๋ ํ๋ฌธ์ผ๋ก, ๊ณ ๋ถ์, ์ ์๋์ง, ํ๊ฒฝ๊ธฐ์ ๋ฑ ํํ ๊ณต์ ์ด ํ์ํ ์ฐ์
์ ํ์ฉ๋ ๋ ํ๋ฌธ์ด๋ค. ํํยทํํ๊ณตํ ์ ๊ณต์์๊ฒ๋ ์ํ, ๋ฌผ๋ฆฌ ๋ฑ ๊ธฐ์ด๊ณผํ๋ถ์ผ์ ๋ํ ์ง์๊ณผ ๋ถ์์ ์ธ ์ฌ๊ณ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ๊ผผ๊ผผํ๊ณ ์ฃผ์ ๊น์ ์ฑ๊ฒฉ๊ณผ ๋ฌผ์ง ๋ณํ์ ๋ํ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 473 |
+
"{'instruction': '์ฌ๋ฃยท์ ์์ฌ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ฌ๋ฃยท์ ์์ฌ ์ ๊ณต์์๋ ๋ค์ํ ์ฌ๋ฃ์ ๊ตฌ์กฐ์ ํน์ง์ ์ดํดํ๊ณ ์ฐ์
์ ํ์ํ ์ ์์ฌ, ์์ฒด์ฌ๋ฃ, ๊ธฐ๋ฅ์ฌ๋ฃ ๋ฑ์ ์ฒจ๋จ ์ฌ๋ฃ์ ๋ํด ์ฐ๊ตฌํ๊ณ ๊ฐ๋ฐํ๋ ํ๋ฌธ์ด๋ค. ์ฌ๋ฃยท์ ์์ฌ ์ ๊ณต์์๊ฒ๋ ๋ค์ํ ์์ฌ์ ๋ํ ์ดํด์ ์ด๋ฅผ ์ค์ํ์ ์ ์ฉ์ํฌ ์ ์๋ ์์ฉ๋ ฅ, ์คํยท์ค์ต์ ์ํ ๋ถ์์ ์ฌ๊ณ ๋ ฅ์ด ํ์ํ๊ณ , ์ํ, ํํ, ๋ฌผ๋ฆฌ ๋ฑ ๊ธฐ์ด๊ณผํ์ ๊ด์ฌ์ด ์ ์ด์ผ ํ๋ค.'}\n",
|
| 474 |
+
"{'instruction': '์ํ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ํ ์ ๊ณต์ ์ธ์ฒด์์ ๋ฐ์ํ๋ ์ง๋ณ์ ์๋ฐฉยท์ง๋จยท์น๋ฃ์ ๊ด๋ จ๋ ์ง์์ ์ต๋ํ๊ณ , ์ด๋ฅผ ํ์ฉํ์ฌ ํ์๋ฅผ ์ง๋ฃํ๋ฉฐ, ์ํ ์ด๋ก ์ ํ๊ตฌํจ์ผ๋ก์จ ์ง๋ณ ์๋ฐฉ ๋ฐ ์น๋ฃ ๋ฐฉ์์ ๋ํ ์ฐ๊ตฌ๋ฅผ ์ํํ๋ค. ์ํ ์ ๊ณต์๋ ํํ ๋ฐ ์๋ช
๊ณผํ์ ๊ธฐ์ด ์ง์๊ณผ ๊ณผํ์ ์ฌ๊ณ ๋ฐฉ ์, ์ํ์ ์ง๋จ๊ณผ ์ฒ์น๋ฅผ ์ํ ์ถ๋ก ๋ ฅ์ด ํ์ํ๊ณ , ํ์์ ๊ณ ํต์ ์ดํดํ๋ฉฐ ์๋ช
์ ๊ตฌ ํ๋ ์ผ์ ๋ณด๋์ ๋๋ผ๊ณ , ์ธ์ฒด์ ๊ตฌ์กฐ์ ๊ธฐ๋ฅ์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 475 |
+
"{'instruction': '์ฝํ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ฝํ ์ ๊ณต์ ์ง๋ณ์ ์๋ฐฉ ๋ฐ ์น๋ฃ์ ์ฌ์ฉ๋๋ ์์ฝํ์ ๊ดํ ๊ธฐ์ด ์ด๋ก ๊ณผ ์์ฝํ์ ๊ฐ๋ฐ, ์์ฐ, ์กฐ์ ์ ๊ด๋ จ๋ ์ง์์ ์ต๋ํ๊ณ ์ด์ ๋ํด ์ฐ๊ตฌํ๋ค. ์ฝํ ์ ๊ณต์๋ ์คํ ๋ฐ ์ค์ต์์์ ์นจ์ฐฉํจ, ๊น์ ์ฃผ์๋ ฅ ๊ทธ๋ฆฌ๊ณ ๋ถ์์ ์ธ ์ฌ๊ณ ๋ ฅ์ด ํ์ํ๊ณ , ํํ, ์๋ช
๊ณผํ์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 476 |
+
"{'instruction': '๊ฐํธ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '๊ฐํธ ์ ๊ณต์ ์ธ๊ฐ์ด ๊ฑด๊ฐ์ ์ ์ง ๋ฐ ์ฆ์งํ๊ณ ์ง๋ณ์ผ๋ก๋ถํฐ ํ๋ณตํ ์ ์๋๋ก ํ๋ ์ด๋ก ๊ณผ ์ค๋ฌด ๊ทธ๋ฆฌ๊ณ ๊ฐํธ ๋์์์ ๊ฐํธ ๋ฌธ์ ๋ฅผ ์ง๋จ, ์ค์ฌ, ํ๊ฐํ ์ ์๋ ๋ฅ๋ ฅ์ ๊ฐ๋ฐ ํ๊ณ ์ ์ฉํ๋ค. ๊ฐํธ ์ ๊ณต์๋ ํํ ๋ฐ ์๋ช
๊ณผํ์ ๋ํ ์ดํด์ ํ์ ๊ฐํธ๋ฅผ ์ํ ์์ฌ ์ํต ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์ง๋ณ๊ณผ ํ์์ ๊ณ ํต์ ๊ด์ฌ๊ณผ ๋ฐฐ๋ ค๊ฐ ์์ด์ผ ํ๋ค.'}\n",
|
| 477 |
+
"{'instruction': '๋ณด๊ฑดยท์์์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '๋ณด๊ฑดยท์์ ์ ๊ณต์ ๊ณต์ค๋ณด๊ฑด, ๊ฐ์ธ ์์ ๋ฑ ๊ฑด๊ฐ์ ํฅ์์ํค๊ธฐ ์ํ ๋ฐฉ์์ ํ์ํ๊ณ , ์ ์ฒด์ ยท์ ์ ์ ์ฅ์ ๋ฅผ ๊ฐ์ง ์ฌ๋๋ค์ ๋ณด์กฐํ๋ ์น๋ฃ ์์ค๊ณผ ์ฒจ๋จ ์๋ฃ ์ฅ๋น๋ฅผ ๋ค๋ฃจ๊ณ ๋ง๋๋ ๊ธฐ์ ์ ๋ํด ์ฐ๊ตฌํ๋ค. ๋ณด๊ฑดยท์์ ์ ๊ณต์๋ ๊ธฐ์ด๊ณผํ์ ๋ํ ์ง์์ ๋ฐํ์ผ๋ก ๊ณต์ค๋ณด๊ฑด๊ณผ ๊ฐ์ธ ์์์ ๋ํ ์ดํด ๋ฅ๋ ฅ, ํ์ ์ํ์ ๋ฐ๋ผ ์ ์ ํ ์น๋ฃ ๋ฐฉ๋ฒ์ ์ํํ ๋ ์คํ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ๋ณด๊ฑด์๋ฃ์ธ์ผ๋ก์์ ์ฌ๋ช
๊ฐ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 478 |
+
"{'instruction': '๋ฌด์ฉ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '๋ฌด์ฉ ์ ๊ณต์์๋ ๋ฌด์ฉ ์ด๋ก ๊ณผ ๊ด๋ จ๋ ์ ๋ฌธ ์ง์์ ์ต๋ํ๊ณ . ์ค๊ธฐ ๊ต์ก์ ํตํด ๊ท ํ ์๋ ์ ์ฒด์ ์์ง์์ ์ฐ๋งํ์ฌ ๋ฌด์ฉ ์ํ์ ๋ํ ํด์๋ ฅ๊ณผ ์์ ์ ํํ๋ ฅ ๋ฑ์ ํจ์ํ ๋ค. ๋ฐ๋ผ์ ์์
๊ณผ ์ฃผ์ ์ ๋ง์ถฐ ๋ชธ์ผ๋ก ํํํ ์ ์๋ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ๊ฐํ ์ฒด๋ ฅ๊ณผ ์ ์ฐํ ์ ์ฒด์กฐ๊ฑด์ ๊ฐ์ถ๊ธฐ ์ํ ๋๊ธฐ์ ์ธ๋ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 479 |
+
"{'instruction': '์ฒด์ก์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ฒด์ก ์ ๊ณต์์๋ ์ ์ฒดํ๋์ ๋ฐํ์ผ๋ก ํ๋ ์ด๋, ๊ฑด๊ฐ, ์คํฌ์ธ ๊ณผํ, ์ฌ๊ฐ์ ๋ํ ์ด๋ก ์ ๋ฐฐ์ฐ๊ณ ์ด์ ๊ด๋ จํ ํน์ ์ค๊ธฐ ์ค์ต์ ์ํํ์ฌ ์ ์ฒด์ ๋ฅ๋ ฅ์ ๊ณ๋ฐํ๋ค. ์ฒด์ก์ ์ ๊ณตํ๊ธฐ ์ํด์๋ ๊ฐ์ธํ ์ฒด๋ ฅ ๋ฐ ์ ์ ๋ ฅ ๊ทธ๋ฆฌ๊ณ ๊ฒฝ๊ธฐ๋ฅผ ์ ๋นํ๊ฒ ์น๋ฅด๊ธฐ ์ํ ์คํฌ์ธ ๋งจ์ญ์ด ํ์ํ๊ณ , ์ด๋ํ๊ธฐ๋ฅผ ์ข์ํ๋ ๋ง์๊ณผ ์คํฌ์ธ ์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 480 |
+
"{'instruction': '์ฐ์ยท์ํยท์ฐ๊ทน์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์ฐ์ยท์ํยท์ฐ๊ทน ์ ๊ณต์ ๋ค์ํ ๋งค์ฒด๋ฅผ ํ์ฉํ์ฌ ์๋ก์ด ์์ ์ธ์ด๋ฅผ ์ฐฝ์ถํ๊ณ ์ํ, ์ฐ๊ทน ๋ฑ์ ๋ถ์ผ์์ ์ธ๊ฐ์ ์ ์๋ฅผ ์ ๋ฌํ ์ ์๋ ํ๋์ ์ํํ๋ค. ๋ฐ๋ผ์ ๋ฐฐ์ญ์ ๋ ํ ๋ถ์๋ ฅ, ์ฐ๊ธฐ๋ ฅ, ์์ ์ ์ง์์ด ํ์ํ๊ณ , ์๊ฐ์ ๋งค์ฒด๋ฅผ ํตํด ํํํ๋ ๋ฅ๋ ฅ์ด ํ์ ํ๋ฉฐ ๋ฏธ์ , ์์
, ๋ฌด์ฉ, ์ดํ, ์ฐ๊ทน, ์ฌํํ, ์ธ๋ก ํ ๋ฑ ๋ค์ํ ๋ถ์ผ์ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 481 |
+
"{'instruction': '์์
์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '์์
์ ๊ณต์์๋ ์์
์ ๋ฐ์ ๊ดํ ์ด๋ก ํ์ต๊ณผ ๋ค์ํ ์์
๊ด๋ จ ๊ธฐ๊ต์ ํ๋ จ์ ํต ํด ๊ธฐ์กด ์์
์ ์ฐ์ฃผํ๊ฑฐ๋ ์๋ก์ด ์์
์ ์ฐฝ์ํ๋ฉฐ, ์ ๋ฌธ์ ์ผ๋ก ๊ฐ์ ๋ฐ ๋นํํ๋ ๋ฅ๋ ฅ ์ ๊ณ๋ฐํ๋ค. ๊ทธ๋ฌ๋ฏ๋ก ์ฒญ์๋ฅ๋ ฅ, ์ฐฝ์๋ ฅ, ์์
์ ์๋ฆฌ๋ ์
๊ธฐ๋ก ํํํ ์ ์๋ ์ฐ์ฃผ ๋ฅ ๋ ฅ์ด ํ์ํ๊ณ , ์๊ฐ๊ณผ ์์
์ ๊ฐ์์ฑ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 482 |
+
"{'instruction': '๋ฏธ์ ยท๊ณต์ยท๋์์ธ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '๋ฏธ์ ยท๊ณต์ยท๋์์ธ ์ ๊ณต์ ๋ฏธ์ ์ ๋ฐ์ ๋ํ ์ง์๊ณผ ์ค๊ธฐ ๊ต์ก์ ํตํด ๋ฏธ์ ์ ์์๋ ฅ ๊ณผ ์ฐฝ์์ฑ ๋ฐ ์ฌ๋ฌ ๊ฐ์ง ํํ ๋ฐฉ๋ฒ์ ์ต๋ํ๊ณ , ๊ธฐ๋ฅ์ ยท์ฌ๋ฏธ์ ์กฐ๊ฑด์ ๊ณ ๋ คํ์ฌ ๊ฐ์ข
์ ํ์ ๋์์ธ์ ๊ฐ๋ฐํ๊ณ ์ฐ๊ตฌํ๋ค. ๋ฐ๋ผ์ ๋ฏธ์ ์ํ์ ๋ง๋ค๊ธฐ ์ํ ๋
์ฐฝ์ฑ๊ณผ ์ฐฝ์์ฑ, ์ ๊ตํ ๊ธฐ๋ฒ ํ์ฉ ๋ฅ๋ ฅ์ด ํ์ํ๊ณ , ์์ ๋ถ์ผ์์์ ๋ณํ์ ์ ํ์ ๋ฏผ๊ฐํด์ผ ํ๋ค.'}\n",
|
| 483 |
+
"{'instruction': '๋์งํธ์ฝํ
์ธ ์ด๋ ๋ฌด์์ธ๊ฐ์?', 'response': '๋์งํธ์ฝํ
์ธ ์ ๊ณต์์๋ ๋ด๋ฏธ๋์ด ๋งค์ฒด์ธ ๋์งํธ ๊ฒ์, ๋์งํธ ์์, ์ ๋๋ฉ์ด์
, ์ธํฐ๋ํฐ๋ธ ๋์์ธ์ฝํ
์ธ ๋ฑ์ ๊ธฐํ๊ณผ ์ฐ์ถ์ ์ํํ๊ณ , ๋ค์ํ ์๊ฐ์ ํํ๊ณผ ๋์์ธ ์ ํ์ ๋ฐ ์ฐฝ์ถํ๋ค. ๊ทธ๋ฌ๋ฏ๋ก ๋์งํธ์ฝํ
์ธ ์ ๊ณต์๊ฐ ๋๊ธฐ ์ํด์๋ ๋์งํธ ๊ธฐ์ ์ ๋ ํ ์ง์, ๋์งํธ์ฝํ
์ธ ์ ์ ๋ฅ๋ ฅ, ํธ๋ ๋๋ฅผ ํ์
ํ๋ ๋ฅ๋ ฅ, ์ฐฝ์๋ ฅ, ๊ธฐํ๋ ฅ์ด ํ์ํ๊ณ , ๋ค์ํ ๋งค์ฒด์ ๋ํ ๊ด์ฌ์ด ์์ด์ผ ํ๋ค.'}\n",
|
| 484 |
+
"[99, 97, 91, 100, 115, 116, 105, 123, 106, 118, 118, 118, 125, 115, 108, 90, 127, 82, 79, 82, 107, 112, 115, 103, 75, 91, 113, 102, 116, 111, 103, 109, 93, 129, 85, 93, 115, 110, 110, 111, 122, 88, 92, 119, 92, 98, 109, 94, 100, 104]\n"
|
| 485 |
+
]
|
| 486 |
+
}
|
| 487 |
+
],
|
| 488 |
+
"source": [
|
| 489 |
+
"# ์ ์ํ ํ์ต๋ฐ์ดํฐ์ ํ ํฌ๋์ด์ ๊ฐ 512๋ฅผ ๋๋ ๋ฐ์ดํฐ๋ ์๋ฆฌ๊ธฐ ๋๋ฌธ์\n",
|
| 490 |
+
"# ํ์ต๋ฐ์ดํฐ๊ฐ ๋ช ํ ํฐ์ผ๋ก ๊ตฌ์ฑ๋๋์ง ํ์
\n",
|
| 491 |
+
"from datasets import load_dataset\n",
|
| 492 |
+
"dataset = load_dataset(\"json\", data_files=\"dataset/kcmii_major_instruction_data_summarized.jsonl\", split=\"train\")\n",
|
| 493 |
+
"\n",
|
| 494 |
+
"def token_len(data) :\n",
|
| 495 |
+
" token_lengths = []\n",
|
| 496 |
+
" prompt = f\"{data['instruction']}\\n๋ต๋ณ: {data['response']}\"\n",
|
| 497 |
+
" token = tokenizer(prompt, truncation=False)[\"input_ids\"]\n",
|
| 498 |
+
" token_lengths.append(len(token))\n",
|
| 499 |
+
" return token_lengths\n",
|
| 500 |
+
" \n",
|
| 501 |
+
"token_lengths = []\n",
|
| 502 |
+
"for i in range(0, len(dataset)) :\n",
|
| 503 |
+
" data = dataset[i]\n",
|
| 504 |
+
" print(data)\n",
|
| 505 |
+
" prompt = f\"{data['instruction']}\\n๋ต๋ณ: {data['response']}\"\n",
|
| 506 |
+
" token = tokenizer(prompt, truncation=False)[\"input_ids\"]\n",
|
| 507 |
+
" token_lengths.append(len(token))\n",
|
| 508 |
+
"print(token_lengths)\n",
|
| 509 |
+
"\n",
|
| 510 |
+
"# 512 ๋์ด๊ฐ๋ ํ ํฐ์ ์์"
|
| 511 |
+
]
|
| 512 |
+
},
|
| 513 |
+
{
|
| 514 |
+
"cell_type": "code",
|
| 515 |
+
"execution_count": 3,
|
| 516 |
+
"id": "be874ebd-da32-418c-9da5-ec68d2d8edfd",
|
| 517 |
+
"metadata": {
|
| 518 |
+
"scrolled": true
|
| 519 |
+
},
|
| 520 |
+
"outputs": [
|
| 521 |
+
{
|
| 522 |
+
"name": "stderr",
|
| 523 |
+
"output_type": "stream",
|
| 524 |
+
"text": [
|
| 525 |
+
"No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n",
|
| 526 |
+
"/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/torch/utils/data/dataloader.py:683: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.\n",
|
| 527 |
+
" warnings.warn(warn_msg)\n"
|
| 528 |
+
]
|
| 529 |
+
},
|
| 530 |
+
{
|
| 531 |
+
"data": {
|
| 532 |
+
"text/html": [
|
| 533 |
+
"\n",
|
| 534 |
+
" <div>\n",
|
| 535 |
+
" \n",
|
| 536 |
+
" <progress value='250' max='250' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
| 537 |
+
" [250/250 15:42, Epoch 10/10]\n",
|
| 538 |
+
" </div>\n",
|
| 539 |
+
" <table border=\"1\" class=\"dataframe\">\n",
|
| 540 |
+
" <thead>\n",
|
| 541 |
+
" <tr style=\"text-align: left;\">\n",
|
| 542 |
+
" <th>Step</th>\n",
|
| 543 |
+
" <th>Training Loss</th>\n",
|
| 544 |
+
" </tr>\n",
|
| 545 |
+
" </thead>\n",
|
| 546 |
+
" <tbody>\n",
|
| 547 |
+
" <tr>\n",
|
| 548 |
+
" <td>10</td>\n",
|
| 549 |
+
" <td>9.802700</td>\n",
|
| 550 |
+
" </tr>\n",
|
| 551 |
+
" <tr>\n",
|
| 552 |
+
" <td>20</td>\n",
|
| 553 |
+
" <td>9.243700</td>\n",
|
| 554 |
+
" </tr>\n",
|
| 555 |
+
" <tr>\n",
|
| 556 |
+
" <td>30</td>\n",
|
| 557 |
+
" <td>8.776400</td>\n",
|
| 558 |
+
" </tr>\n",
|
| 559 |
+
" <tr>\n",
|
| 560 |
+
" <td>40</td>\n",
|
| 561 |
+
" <td>8.264700</td>\n",
|
| 562 |
+
" </tr>\n",
|
| 563 |
+
" <tr>\n",
|
| 564 |
+
" <td>50</td>\n",
|
| 565 |
+
" <td>7.813600</td>\n",
|
| 566 |
+
" </tr>\n",
|
| 567 |
+
" <tr>\n",
|
| 568 |
+
" <td>60</td>\n",
|
| 569 |
+
" <td>6.891800</td>\n",
|
| 570 |
+
" </tr>\n",
|
| 571 |
+
" <tr>\n",
|
| 572 |
+
" <td>70</td>\n",
|
| 573 |
+
" <td>6.322800</td>\n",
|
| 574 |
+
" </tr>\n",
|
| 575 |
+
" <tr>\n",
|
| 576 |
+
" <td>80</td>\n",
|
| 577 |
+
" <td>5.139000</td>\n",
|
| 578 |
+
" </tr>\n",
|
| 579 |
+
" <tr>\n",
|
| 580 |
+
" <td>90</td>\n",
|
| 581 |
+
" <td>3.943400</td>\n",
|
| 582 |
+
" </tr>\n",
|
| 583 |
+
" <tr>\n",
|
| 584 |
+
" <td>100</td>\n",
|
| 585 |
+
" <td>2.517000</td>\n",
|
| 586 |
+
" </tr>\n",
|
| 587 |
+
" <tr>\n",
|
| 588 |
+
" <td>110</td>\n",
|
| 589 |
+
" <td>1.442300</td>\n",
|
| 590 |
+
" </tr>\n",
|
| 591 |
+
" <tr>\n",
|
| 592 |
+
" <td>120</td>\n",
|
| 593 |
+
" <td>1.049300</td>\n",
|
| 594 |
+
" </tr>\n",
|
| 595 |
+
" <tr>\n",
|
| 596 |
+
" <td>130</td>\n",
|
| 597 |
+
" <td>0.892200</td>\n",
|
| 598 |
+
" </tr>\n",
|
| 599 |
+
" <tr>\n",
|
| 600 |
+
" <td>140</td>\n",
|
| 601 |
+
" <td>0.763800</td>\n",
|
| 602 |
+
" </tr>\n",
|
| 603 |
+
" <tr>\n",
|
| 604 |
+
" <td>150</td>\n",
|
| 605 |
+
" <td>0.728200</td>\n",
|
| 606 |
+
" </tr>\n",
|
| 607 |
+
" <tr>\n",
|
| 608 |
+
" <td>160</td>\n",
|
| 609 |
+
" <td>0.697900</td>\n",
|
| 610 |
+
" </tr>\n",
|
| 611 |
+
" <tr>\n",
|
| 612 |
+
" <td>170</td>\n",
|
| 613 |
+
" <td>0.658500</td>\n",
|
| 614 |
+
" </tr>\n",
|
| 615 |
+
" <tr>\n",
|
| 616 |
+
" <td>180</td>\n",
|
| 617 |
+
" <td>0.633600</td>\n",
|
| 618 |
+
" </tr>\n",
|
| 619 |
+
" <tr>\n",
|
| 620 |
+
" <td>190</td>\n",
|
| 621 |
+
" <td>0.616100</td>\n",
|
| 622 |
+
" </tr>\n",
|
| 623 |
+
" <tr>\n",
|
| 624 |
+
" <td>200</td>\n",
|
| 625 |
+
" <td>0.639900</td>\n",
|
| 626 |
+
" </tr>\n",
|
| 627 |
+
" <tr>\n",
|
| 628 |
+
" <td>210</td>\n",
|
| 629 |
+
" <td>0.610700</td>\n",
|
| 630 |
+
" </tr>\n",
|
| 631 |
+
" <tr>\n",
|
| 632 |
+
" <td>220</td>\n",
|
| 633 |
+
" <td>0.607500</td>\n",
|
| 634 |
+
" </tr>\n",
|
| 635 |
+
" <tr>\n",
|
| 636 |
+
" <td>230</td>\n",
|
| 637 |
+
" <td>0.618500</td>\n",
|
| 638 |
+
" </tr>\n",
|
| 639 |
+
" <tr>\n",
|
| 640 |
+
" <td>240</td>\n",
|
| 641 |
+
" <td>0.592900</td>\n",
|
| 642 |
+
" </tr>\n",
|
| 643 |
+
" <tr>\n",
|
| 644 |
+
" <td>250</td>\n",
|
| 645 |
+
" <td>0.587400</td>\n",
|
| 646 |
+
" </tr>\n",
|
| 647 |
+
" </tbody>\n",
|
| 648 |
+
"</table><p>"
|
| 649 |
+
],
|
| 650 |
+
"text/plain": [
|
| 651 |
+
"<IPython.core.display.HTML object>"
|
| 652 |
+
]
|
| 653 |
+
},
|
| 654 |
+
"metadata": {},
|
| 655 |
+
"output_type": "display_data"
|
| 656 |
+
},
|
| 657 |
+
{
|
| 658 |
+
"name": "stderr",
|
| 659 |
+
"output_type": "stream",
|
| 660 |
+
"text": [
|
| 661 |
+
"/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/torch/utils/data/dataloader.py:683: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.\n",
|
| 662 |
+
" warnings.warn(warn_msg)\n",
|
| 663 |
+
"/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/torch/utils/data/dataloader.py:683: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.\n",
|
| 664 |
+
" warnings.warn(warn_msg)\n",
|
| 665 |
+
"/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/torch/utils/data/dataloader.py:683: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.\n",
|
| 666 |
+
" warnings.warn(warn_msg)\n",
|
| 667 |
+
"/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/torch/utils/data/dataloader.py:683: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.\n",
|
| 668 |
+
" warnings.warn(warn_msg)\n",
|
| 669 |
+
"/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/torch/utils/data/dataloader.py:683: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.\n",
|
| 670 |
+
" warnings.warn(warn_msg)\n",
|
| 671 |
+
"/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/torch/utils/data/dataloader.py:683: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.\n",
|
| 672 |
+
" warnings.warn(warn_msg)\n",
|
| 673 |
+
"/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/torch/utils/data/dataloader.py:683: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.\n",
|
| 674 |
+
" warnings.warn(warn_msg)\n",
|
| 675 |
+
"/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/torch/utils/data/dataloader.py:683: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.\n",
|
| 676 |
+
" warnings.warn(warn_msg)\n",
|
| 677 |
+
"/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/torch/utils/data/dataloader.py:683: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.\n",
|
| 678 |
+
" warnings.warn(warn_msg)\n"
|
| 679 |
+
]
|
| 680 |
+
},
|
| 681 |
+
{
|
| 682 |
+
"data": {
|
| 683 |
+
"text/plain": [
|
| 684 |
+
"('./kcmii_major_lora_tuned/tokenizer_config.json',\n",
|
| 685 |
+
" './kcmii_major_lora_tuned/special_tokens_map.json',\n",
|
| 686 |
+
" './kcmii_major_lora_tuned/tokenizer.json')"
|
| 687 |
+
]
|
| 688 |
+
},
|
| 689 |
+
"execution_count": 3,
|
| 690 |
+
"metadata": {},
|
| 691 |
+
"output_type": "execute_result"
|
| 692 |
+
}
|
| 693 |
+
],
|
| 694 |
+
"source": [
|
| 695 |
+
"tokenized_dataset = dataset.map(tokenize)\n",
|
| 696 |
+
"\n",
|
| 697 |
+
"# 5. ํ์ต์ ์ํํ ๋ ์ฌ์ฉํ ํ๋ผ๋ฏธํฐ ์ง์ \n",
|
| 698 |
+
"training_args = TrainingArguments(\n",
|
| 699 |
+
" # ํ์ต ๋์ค ๋ชจ๋ธ ์ฒดํฌํฌ์ธํธ ์ ์ฅํ ํด๋ ๊ฒฝ๋ก\n",
|
| 700 |
+
" ## ์ค๊ฐ ๊ฒฐ๊ณผ ๋ฐฑ์
, ์คํ๋ณ ๊ตฌ๋ถ ๊ฐ๋ฅ\n",
|
| 701 |
+
" output_dir=\"./kcmii_major_lora\",\n",
|
| 702 |
+
" # ํ๋ฒ์ ํ์ตํ ์ํ ๊ฐ์\n",
|
| 703 |
+
" per_device_train_batch_size=2,\n",
|
| 704 |
+
" # ์ ์ฒด ๋ฐ์ดํฐ๋ฅผ ๋ช ๋ฒ ๋ฐ๋ณต ํ์ตํ ์ง\n",
|
| 705 |
+
" num_train_epochs=10,\n",
|
| 706 |
+
" # ๋ช ์คํ
๋ง๋ค ํ์ต ๋ก๊ทธ ์ถ๋ ฅํ ์ง\n",
|
| 707 |
+
" logging_steps=10,\n",
|
| 708 |
+
" # ๋ชจ๋ธ ์ ์ฅ ์ฃผ๊ธฐ ์ค์ (๋งค ์ํญ๋ง๋ค ์ ์ฅ)\n",
|
| 709 |
+
" save_strategy=\"epoch\",\n",
|
| 710 |
+
" # ์ต์ ํ ๋ฐฉ๋ฒ ์ค์ \n",
|
| 711 |
+
" optim=\"adamw_torch\"\n",
|
| 712 |
+
")\n",
|
| 713 |
+
"\n",
|
| 714 |
+
"# 6. Trainer ๊ตฌ์ฑ ๋ฐ ํ์ต ์คํ\n",
|
| 715 |
+
"## ๋ชจ๋ธ, ๋ฐ์ดํฐ์
, ํ์ต ์ธ์๋ค์ ๋ฐํ์ผ๋ก ๋ด๋ถ์ ์ผ๋ก for ๋ฐ๋ณต, ์ต์ ํ, ์์ค ๊ณ์ฐ ๋ฑ์ ์๋ ์ฒ๋ฆฌ\n",
|
| 716 |
+
"trainer = Trainer(\n",
|
| 717 |
+
" model=model,\n",
|
| 718 |
+
" args=training_args,\n",
|
| 719 |
+
" train_dataset=tokenized_dataset\n",
|
| 720 |
+
")\n",
|
| 721 |
+
"\n",
|
| 722 |
+
"trainer.train()\n",
|
| 723 |
+
"\n",
|
| 724 |
+
"# 7. ํ๋๋ ๋ชจ๋ธ ์ ์ฅ\n",
|
| 725 |
+
"model.save_pretrained(\"./kcmii_major_lora_tuned\")\n",
|
| 726 |
+
"tokenizer.save_pretrained(\"./kcmii_major_lora_tuned\")"
|
| 727 |
+
]
|
| 728 |
+
},
|
| 729 |
+
{
|
| 730 |
+
"cell_type": "code",
|
| 731 |
+
"execution_count": 16,
|
| 732 |
+
"id": "7e217351-2861-4bb7-b04b-657e91abe461",
|
| 733 |
+
"metadata": {},
|
| 734 |
+
"outputs": [
|
| 735 |
+
{
|
| 736 |
+
"name": "stderr",
|
| 737 |
+
"output_type": "stream",
|
| 738 |
+
"text": [
|
| 739 |
+
"Loading checkpoint shards: 100%|โโโโโโโโโโ| 3/3 [00:11<00:00, 3.93s/it]\n",
|
| 740 |
+
"Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
|
| 741 |
+
]
|
| 742 |
+
},
|
| 743 |
+
{
|
| 744 |
+
"name": "stdout",
|
| 745 |
+
"output_type": "stream",
|
| 746 |
+
"text": [
|
| 747 |
+
"\n",
|
| 748 |
+
"๐ ์์ฑ๋ ๋ฌธ์ฅ:\n",
|
| 749 |
+
"\n",
|
| 750 |
+
"์ด๋ฌธํ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 751 |
+
"๋ต๋ณ: ์ด๋ฌธํ์ ๋ฌธํ์ ์ค์ฌ์ผ๋ก ํ๋ฌธ์ ํ ์์ญ\n"
|
| 752 |
+
]
|
| 753 |
+
}
|
| 754 |
+
],
|
| 755 |
+
"source": [
|
| 756 |
+
"# 1์ฐจ ์ถ๋ก ํ
์คํธ\n",
|
| 757 |
+
"from peft import PeftModel\n",
|
| 758 |
+
"from transformers import AutoTokenizer, AutoModelForCausalLM\n",
|
| 759 |
+
"import torch\n",
|
| 760 |
+
"\n",
|
| 761 |
+
"device = torch.device(\"mps\" if torch.backends.mps.is_available() else \"cpu\")\n",
|
| 762 |
+
"\n",
|
| 763 |
+
"base_model = AutoModelForCausalLM.from_pretrained(\"EleutherAI/polyglot-ko-1.3b\", device_map=None).to(device)\n",
|
| 764 |
+
"model = PeftModel.from_pretrained(base_model, \"./kcmii_major_lora_tuned\").to(device)\n",
|
| 765 |
+
"\n",
|
| 766 |
+
"tokenizer = AutoTokenizer.from_pretrained(\"EleutherAI/polyglot-ko-1.3b\")\n",
|
| 767 |
+
"\n",
|
| 768 |
+
"prompt = \"์ด๋ฌธํ์ด๋ ๋ฌด์์ธ๊ฐ์?\\n๋ต๋ณ: ์ด๋ฌธํ์\"\n",
|
| 769 |
+
"inputs = tokenizer(prompt, return_tensors=\"pt\").to(device)\n",
|
| 770 |
+
"\n",
|
| 771 |
+
"if \"token_type_ids\" in inputs:\n",
|
| 772 |
+
" inputs.pop(\"token_type_ids\")\n",
|
| 773 |
+
"\n",
|
| 774 |
+
"with torch.no_grad():\n",
|
| 775 |
+
" outputs = model.generate(\n",
|
| 776 |
+
" **inputs,\n",
|
| 777 |
+
" max_new_tokens=300,\n",
|
| 778 |
+
" do_sample=True,\n",
|
| 779 |
+
" top_k=50,\n",
|
| 780 |
+
" top_p=0.95,\n",
|
| 781 |
+
" temperature=0.8,\n",
|
| 782 |
+
" repetition_penalty=1.1\n",
|
| 783 |
+
" )\n",
|
| 784 |
+
"\n",
|
| 785 |
+
"result = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
|
| 786 |
+
"print(\"\\n๐ ์์ฑ๋ ๋ฌธ์ฅ:\\n\")\n",
|
| 787 |
+
"print(result)"
|
| 788 |
+
]
|
| 789 |
+
},
|
| 790 |
+
{
|
| 791 |
+
"cell_type": "code",
|
| 792 |
+
"execution_count": 11,
|
| 793 |
+
"id": "6741bf0d-33cc-40fd-8936-15e29536424d",
|
| 794 |
+
"metadata": {},
|
| 795 |
+
"outputs": [
|
| 796 |
+
{
|
| 797 |
+
"name": "stdout",
|
| 798 |
+
"output_type": "stream",
|
| 799 |
+
"text": [
|
| 800 |
+
"๋ก๋ผ ํ๋ผ๋ฏธํฐ ์: 1572864\n"
|
| 801 |
+
]
|
| 802 |
+
}
|
| 803 |
+
],
|
| 804 |
+
"source": [
|
| 805 |
+
"print(\"๋ก๋ผ ํ๋ผ๋ฏธํฐ ์:\", sum(p.numel() for n, p in model.named_parameters() if \"lora\" in n))"
|
| 806 |
+
]
|
| 807 |
+
},
|
| 808 |
+
{
|
| 809 |
+
"cell_type": "code",
|
| 810 |
+
"execution_count": 12,
|
| 811 |
+
"id": "0c224268-1848-49a1-98c3-425c189063bd",
|
| 812 |
+
"metadata": {},
|
| 813 |
+
"outputs": [
|
| 814 |
+
{
|
| 815 |
+
"name": "stdout",
|
| 816 |
+
"output_type": "stream",
|
| 817 |
+
"text": [
|
| 818 |
+
"๐ข ์ถ๋ ฅ ํ ํฐ ์: 69\n",
|
| 819 |
+
"๐งฉ ํ ํฐ ID ์ํ์ค: [348, 3446, 5517, 1937, 11260, 34, 202, 1477, 1078, 29, 1437, 3446, 296, 414, 15, 3260, 15, 26667, 15, 16499, 15, 29953, 433, 285, 3636, 285, 18765, 301, 502, 284, 453, 414, 15, 3260, 15, 26667, 15, 16499, 15, 29953, 433, 3636, 285, 2831, 274, 1026, 26667, 15, 7991, 15, 7991, 14735, 15, 7991, 5694, 433, 339, 7093, 3668, 17, 1437, 3446, 296, 3636, 285, 394, 18765, 2048, 2]\n",
|
| 820 |
+
"๐ ๋์ฝ๋ฉ ์ ์ฒด: ์ด๋ฌธํ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 821 |
+
"๋ต๋ณ: ํ๋๋ฌธํ์ ์, ์์ค, ํฌ๊ณก, ์ํ, ํ๋ก ๋ฑ์ ๋ฌธํ์ ๊ฐ๋๋ฅผ ๋งํ๋ฉฐ ์, ์์ค, ํฌ๊ณก, ์ํ, ํ๋ก ๋ฑ ๋ฌธํ์ ํํ์ ๋ฐ๋ผ ํฌ๊ณก, ์๋๋ฆฌ์ค, ์๋๋ฆฌ์ค์ฐฝ์, ์๋๋ฆฌ์ค์๊ฐ ๋ฑ์ผ๋ก ๋๋ฉ๋๋ค. ํ๋๋ฌธํ์ ๋ฌธํ์ ํ ๊ฐ๋๋ก์<|endoftext|>\n"
|
| 822 |
+
]
|
| 823 |
+
}
|
| 824 |
+
],
|
| 825 |
+
"source": [
|
| 826 |
+
"print(\"๐ข ์ถ๋ ฅ ํ ํฐ ์:\", len(outputs[0]))\n",
|
| 827 |
+
"print(\"๐งฉ ํ ํฐ ID ์ํ์ค:\", outputs[0].tolist())\n",
|
| 828 |
+
"print(\"๐ ๋์ฝ๋ฉ ์ ์ฒด:\", tokenizer.decode(outputs[0], skip_special_tokens=False))"
|
| 829 |
+
]
|
| 830 |
+
},
|
| 831 |
+
{
|
| 832 |
+
"cell_type": "code",
|
| 833 |
+
"execution_count": 8,
|
| 834 |
+
"id": "83bdead2-8efe-4764-9b9e-d3d29c884ac6",
|
| 835 |
+
"metadata": {},
|
| 836 |
+
"outputs": [
|
| 837 |
+
{
|
| 838 |
+
"name": "stderr",
|
| 839 |
+
"output_type": "stream",
|
| 840 |
+
"text": [
|
| 841 |
+
"/opt/anaconda3/envs/report_error_detector/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:631: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.7` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n",
|
| 842 |
+
" warnings.warn(\n",
|
| 843 |
+
"Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n",
|
| 844 |
+
"Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n",
|
| 845 |
+
"Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
|
| 846 |
+
]
|
| 847 |
+
},
|
| 848 |
+
{
|
| 849 |
+
"name": "stdout",
|
| 850 |
+
"output_type": "stream",
|
| 851 |
+
"text": [
|
| 852 |
+
"\n",
|
| 853 |
+
"๐ ํ๋กฌํํธ: ์ด๋ฌธํ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 854 |
+
"๋ต๋ณ:\n",
|
| 855 |
+
"๐ ์์ฑ๋ ๋ฌธ์ฅ:\n",
|
| 856 |
+
"์ด๋ฌธํ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 857 |
+
"๋ต๋ณ: ๋ฌธํ์ ์ธ๊ฐ์ ์ฌ์๊ณผ ๊ฐ์ ์ ํํํ๋ ์์ ์ \n",
|
| 858 |
+
"\n",
|
| 859 |
+
"๐ ํ๋กฌํํธ: ์ด๋ฌธํ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 860 |
+
"\n",
|
| 861 |
+
"๋ต๋ณ:\n",
|
| 862 |
+
"๐ ์์ฑ๋ ๋ฌธ์ฅ:\n",
|
| 863 |
+
"์ด๋ฌธํ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 864 |
+
"\n",
|
| 865 |
+
"๋ต๋ณ:\n"
|
| 866 |
+
]
|
| 867 |
+
},
|
| 868 |
+
{
|
| 869 |
+
"name": "stderr",
|
| 870 |
+
"output_type": "stream",
|
| 871 |
+
"text": [
|
| 872 |
+
"Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
|
| 873 |
+
]
|
| 874 |
+
},
|
| 875 |
+
{
|
| 876 |
+
"name": "stdout",
|
| 877 |
+
"output_type": "stream",
|
| 878 |
+
"text": [
|
| 879 |
+
"\n",
|
| 880 |
+
"๐ ํ๋กฌํํธ: ์ด๋ฌธํ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 881 |
+
"๋ต๋ณ:\n",
|
| 882 |
+
"\n",
|
| 883 |
+
"๐ ์์ฑ๋ ๋ฌธ์ฅ:\n",
|
| 884 |
+
"์ด๋ฌธํ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 885 |
+
"๋ต๋ณ:\n",
|
| 886 |
+
"๋ฌธํ์ ๋ฌธํ์ ํ ๋ถ์ผ๋ก, ์ธ์ด๋ฅผ ๋งค์ฒด๋ก ํ์ฌ ์ธ๊ฐ๊ณผ ์ธ๊ณ์ ๋ํ ํํ์ ํ๊ตฌํ๋\n",
|
| 887 |
+
"\n",
|
| 888 |
+
"๐ ํ๋กฌํํธ: Instruction: ์ด๋ฌธํ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 889 |
+
"Response:\n",
|
| 890 |
+
"๐ ์์ฑ๋ ๋ฌธ์ฅ:\n",
|
| 891 |
+
"Instruction: ์ด๋ฌธํ์ด๋ ๋ฌด์์ธ๊ฐ์?\n",
|
| 892 |
+
"Response:\n"
|
| 893 |
+
]
|
| 894 |
+
}
|
| 895 |
+
],
|
| 896 |
+
"source": [
|
| 897 |
+
"for test_prompt in [\n",
|
| 898 |
+
" \"์ด๋ฌธํ์ด๋ ๋ฌด์์ธ๊ฐ์?\\n๋ต๋ณ:\",\n",
|
| 899 |
+
" \"์ด๋ฌธํ์ด๋ ๋ฌด์์ธ๊ฐ์?\\n\\n๋ต๋ณ:\",\n",
|
| 900 |
+
" \"์ด๋ฌธํ์ด๋ ๋ฌด์์ธ๊ฐ์?\\n๋ต๋ณ:\\n\",\n",
|
| 901 |
+
" \"Instruction: ์ด๋ฌธํ์ด๋ ๋ฌด์์ธ๊ฐ์?\\nResponse:\"\n",
|
| 902 |
+
"]:\n",
|
| 903 |
+
" inputs = tokenizer(test_prompt, return_tensors=\"pt\").to(device)\n",
|
| 904 |
+
" if \"token_type_ids\" in inputs:\n",
|
| 905 |
+
" inputs.pop(\"token_type_ids\")\n",
|
| 906 |
+
" with torch.no_grad():\n",
|
| 907 |
+
" outputs = model.generate(\n",
|
| 908 |
+
" **inputs,\n",
|
| 909 |
+
" max_new_tokens=100,\n",
|
| 910 |
+
" do_sample=False,\n",
|
| 911 |
+
" temperature=0.7,\n",
|
| 912 |
+
" top_k=0,\n",
|
| 913 |
+
" top_p=1.0,\n",
|
| 914 |
+
" repetition_penalty=1.2,\n",
|
| 915 |
+
" eos_token_id=tokenizer.eos_token_id\n",
|
| 916 |
+
" )\n",
|
| 917 |
+
" result = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
|
| 918 |
+
" print(f\"\\n๐ ํ๋กฌํํธ: {test_prompt}\\n๐ ์์ฑ๋ ๋ฌธ์ฅ:\\n{result}\")"
|
| 919 |
+
]
|
| 920 |
+
},
|
| 921 |
+
{
|
| 922 |
+
"cell_type": "code",
|
| 923 |
+
"execution_count": 12,
|
| 924 |
+
"id": "2176d8d5-bdfb-43b3-bcbe-6cc8548514f3",
|
| 925 |
+
"metadata": {},
|
| 926 |
+
"outputs": [
|
| 927 |
+
{
|
| 928 |
+
"name": "stderr",
|
| 929 |
+
"output_type": "stream",
|
| 930 |
+
"text": [
|
| 931 |
+
"Loading checkpoint shards: 100%|โโโโโโโโโโ| 3/3 [00:09<00:00, 3.24s/it]\n"
|
| 932 |
+
]
|
| 933 |
+
},
|
| 934 |
+
{
|
| 935 |
+
"name": "stdout",
|
| 936 |
+
"output_type": "stream",
|
| 937 |
+
"text": [
|
| 938 |
+
"๐ ์์ 10๊ฐ ์์ธก ๊ฒฐ๊ณผ:\n",
|
| 939 |
+
"1. ' ๋ฌธํ' - ํ๋ฅ : 10.66%\n",
|
| 940 |
+
"2. '<|endoftext|>' - ํ๋ฅ : 10.05%\n",
|
| 941 |
+
"3. ' ์ธ๊ฐ' - ํ๋ฅ : 5.13%\n",
|
| 942 |
+
"4. ' ์ธ์ด' - ํ๋ฅ : 3.81%\n",
|
| 943 |
+
"5. ' ๋ฌด์' - ํ๋ฅ : 2.44%\n",
|
| 944 |
+
"6. ' ํฌ' - ํ๋ฅ : 2.14%\n",
|
| 945 |
+
"7. ' ์์
' - ํ๋ฅ : 1.92%\n",
|
| 946 |
+
"8. ' ์ธ๋ฌธํ' - ํ๋ฅ : 1.84%\n",
|
| 947 |
+
"9. ' ์ฌํ' - ํ๋ฅ : 1.61%\n",
|
| 948 |
+
"10. ' ๋ฌธํ' - ํ๋ฅ : 1.45%\n"
|
| 949 |
+
]
|
| 950 |
+
}
|
| 951 |
+
],
|
| 952 |
+
"source": [
|
| 953 |
+
"# ํ์ต๏ฟฝ๏ฟฝ๏ฟฝ ๋ชจ๋ธ์ ์
๋ ฅ์ ๋ค์ ๋จ์ด๊ฐ ์ฌ ์์ธก ํ๋ฅ ๋ถํฌ\n",
|
| 954 |
+
"import torch\n",
|
| 955 |
+
"from transformers import AutoModelForCausalLM, AutoTokenizer\n",
|
| 956 |
+
"from peft import PeftModel\n",
|
| 957 |
+
"\n",
|
| 958 |
+
"# ๋ชจ๋ธ ๋ถ๋ฌ์ค๊ธฐ\n",
|
| 959 |
+
"model_name = \"EleutherAI/polyglot-ko-1.3b\"\n",
|
| 960 |
+
"base_model = AutoModelForCausalLM.from_pretrained(model_name).to(\"mps\")\n",
|
| 961 |
+
"model = PeftModel.from_pretrained(base_model, \"./kcmii_major_lora_tuned\").to(\"mps\")\n",
|
| 962 |
+
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
|
| 963 |
+
"\n",
|
| 964 |
+
"# ํ๋กฌํํธ\n",
|
| 965 |
+
"prompt = \"์ด๋ฌธํ์ด๋ ๋ฌด์์ธ๊ฐ์?\\n๋ต๋ณ: ์ด๋ฌธํ์\"\n",
|
| 966 |
+
"inputs = tokenizer(prompt, return_tensors=\"pt\").to(\"mps\")\n",
|
| 967 |
+
"\n",
|
| 968 |
+
"# token_type_ids ์ ๊ฑฐ\n",
|
| 969 |
+
"if \"token_type_ids\" in inputs:\n",
|
| 970 |
+
" inputs.pop(\"token_type_ids\")\n",
|
| 971 |
+
"\n",
|
| 972 |
+
"# ๋ชจ๋ธ ์ถ๋ก \n",
|
| 973 |
+
"with torch.no_grad():\n",
|
| 974 |
+
" outputs = model(**inputs)\n",
|
| 975 |
+
"\n",
|
| 976 |
+
"# ๋ง์ง๋ง ํ ํฐ ๋ค์์ ์์ฑ๋ ๋จ์ด์ logit (์์ธก ๋ถํฌ)\n",
|
| 977 |
+
"next_token_logits = outputs.logits[0, -1]\n",
|
| 978 |
+
"\n",
|
| 979 |
+
"# softmax๋ก ํ๋ฅ ํ\n",
|
| 980 |
+
"probs = torch.nn.functional.softmax(next_token_logits, dim=-1)\n",
|
| 981 |
+
"\n",
|
| 982 |
+
"# ์์ 10๊ฐ ๋จ์ด ์ถ๋ ฅ\n",
|
| 983 |
+
"topk = torch.topk(probs, k=10)\n",
|
| 984 |
+
"top_indices = topk.indices\n",
|
| 985 |
+
"top_probs = topk.values\n",
|
| 986 |
+
"\n",
|
| 987 |
+
"print(\"๐ ์์ 10๊ฐ ์์ธก ๊ฒฐ๊ณผ:\")\n",
|
| 988 |
+
"for i in range(10):\n",
|
| 989 |
+
" token = top_indices[i].item()\n",
|
| 990 |
+
" print(f\"{i+1}. '{tokenizer.decode([token])}' - ํ๋ฅ : {top_probs[i].item()*100:.2f}%\")"
|
| 991 |
+
]
|
| 992 |
+
},
|
| 993 |
+
{
|
| 994 |
+
"cell_type": "code",
|
| 995 |
+
"execution_count": 15,
|
| 996 |
+
"id": "652acde5-5591-46a1-992e-ffd93f972dde",
|
| 997 |
+
"metadata": {},
|
| 998 |
+
"outputs": [
|
| 999 |
+
{
|
| 1000 |
+
"name": "stderr",
|
| 1001 |
+
"output_type": "stream",
|
| 1002 |
+
"text": [
|
| 1003 |
+
"Loading checkpoint shards: 100%|โโโโโโโโโโ| 3/3 [00:11<00:00, 3.84s/it]\n"
|
| 1004 |
+
]
|
| 1005 |
+
},
|
| 1006 |
+
{
|
| 1007 |
+
"name": "stdout",
|
| 1008 |
+
"output_type": "stream",
|
| 1009 |
+
"text": [
|
| 1010 |
+
"๐ ์์ 10๊ฐ ์์ธก ๊ฒฐ๊ณผ:\n",
|
| 1011 |
+
"1. ' ๋ฌธํ' - ํ๋ฅ : 13.43%\n",
|
| 1012 |
+
"2. ' ์ธ์ด' - ํ๋ฅ : 11.99%\n",
|
| 1013 |
+
"3. ' ์ธ๊ฐ' - ํ๋ฅ : 5.63%\n",
|
| 1014 |
+
"4. ' ๋ฌธ์' - ํ๋ฅ : 5.20%\n",
|
| 1015 |
+
"5. ' ์ฐ๋ฆฌ' - ํ๋ฅ : 3.36%\n",
|
| 1016 |
+
"6. ' ๋ง' - ํ๋ฅ : 2.78%\n",
|
| 1017 |
+
"7. ' ํฌ' - ํ๋ฅ : 2.14%\n",
|
| 1018 |
+
"8. ' ์ธ๋ฌธํ' - ํ๋ฅ : 1.80%\n",
|
| 1019 |
+
"9. ' ์ฌ๋' - ํ๋ฅ : 1.55%\n",
|
| 1020 |
+
"10. ' ์์ด' - ํ๋ฅ : 1.50%\n"
|
| 1021 |
+
]
|
| 1022 |
+
}
|
| 1023 |
+
],
|
| 1024 |
+
"source": [
|
| 1025 |
+
"# ๋ฒ ์ด์ค ๋ชจ๋ธ์ ์
๋ ฅ์ ๋ค์ ๋จ์ด๊ฐ ์ฌ ์์ธก ํ๋ฅ ๋ถํฌ\n",
|
| 1026 |
+
"import torch\n",
|
| 1027 |
+
"from transformers import AutoModelForCausalLM, AutoTokenizer\n",
|
| 1028 |
+
"from peft import PeftModel\n",
|
| 1029 |
+
"\n",
|
| 1030 |
+
"# ๋ชจ๋ธ ๋ถ๋ฌ์ค๊ธฐ\n",
|
| 1031 |
+
"model_name = \"EleutherAI/polyglot-ko-1.3b\"\n",
|
| 1032 |
+
"model = AutoModelForCausalLM.from_pretrained(model_name).to(\"mps\")\n",
|
| 1033 |
+
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
|
| 1034 |
+
"\n",
|
| 1035 |
+
"# ํ๋กฌํํธ\n",
|
| 1036 |
+
"prompt = \"์ด๋ฌธํ์ด๋ ๋ฌด์์ธ๊ฐ์?\\n๋ต๋ณ: ์ด๋ฌธํ์\"\n",
|
| 1037 |
+
"inputs = tokenizer(prompt, return_tensors=\"pt\").to(\"mps\")\n",
|
| 1038 |
+
"\n",
|
| 1039 |
+
"# token_type_ids ์ ๊ฑฐ\n",
|
| 1040 |
+
"if \"token_type_ids\" in inputs:\n",
|
| 1041 |
+
" inputs.pop(\"token_type_ids\")\n",
|
| 1042 |
+
"\n",
|
| 1043 |
+
"# ๋ชจ๋ธ ์ถ๋ก \n",
|
| 1044 |
+
"with torch.no_grad():\n",
|
| 1045 |
+
" outputs = model(**inputs)\n",
|
| 1046 |
+
"\n",
|
| 1047 |
+
"# ๋ง์ง๋ง ํ ํฐ ๋ค์์ ์์ฑ๋ ๋จ์ด์ logit (์์ธก ๋ถํฌ)\n",
|
| 1048 |
+
"next_token_logits = outputs.logits[0, -1]\n",
|
| 1049 |
+
"\n",
|
| 1050 |
+
"# softmax๋ก ํ๋ฅ ํ\n",
|
| 1051 |
+
"probs = torch.nn.functional.softmax(next_token_logits, dim=-1)\n",
|
| 1052 |
+
"\n",
|
| 1053 |
+
"# ์์ 10๊ฐ ๋จ์ด ์ถ๋ ฅ\n",
|
| 1054 |
+
"topk = torch.topk(probs, k=10)\n",
|
| 1055 |
+
"top_indices = topk.indices\n",
|
| 1056 |
+
"top_probs = topk.values\n",
|
| 1057 |
+
"\n",
|
| 1058 |
+
"print(\"๐ ์์ 10๊ฐ ์์ธก ๊ฒฐ๊ณผ:\")\n",
|
| 1059 |
+
"for i in range(10):\n",
|
| 1060 |
+
" token = top_indices[i].item()\n",
|
| 1061 |
+
" print(f\"{i+1}. '{tokenizer.decode([token])}' - ํ๋ฅ : {top_probs[i].item()*100:.2f}%\")"
|
| 1062 |
+
]
|
| 1063 |
+
},
|
| 1064 |
+
{
|
| 1065 |
+
"cell_type": "markdown",
|
| 1066 |
+
"id": "a8af7579-789f-4d22-9bea-c136532ed8de",
|
| 1067 |
+
"metadata": {},
|
| 1068 |
+
"source": [
|
| 1069 |
+
"## ๋ฒ ์ด์ค ๋ชจ๋ธ ์ํฌํธ์ ์ถ๋ก ํ
์คํธ ์๋ฃ \n",
|
| 1070 |
+
"## ๊ทธ๋ฌ๋ LoRA ๊ฒฝ๋ ํ์ต์ผ๋ก ์ถ๋ก ์ฑ๋ฅ์ด ์ข์ง ์์ Full-fine-tunning์ผ๋ก ์งํ\n",
|
| 1071 |
+
"## kcmii_lm_fullfinetuning ์คํฌ๋ฆฝํธ ์ฐธ๊ณ "
|
| 1072 |
+
]
|
| 1073 |
+
},
|
| 1074 |
+
{
|
| 1075 |
+
"cell_type": "code",
|
| 1076 |
+
"execution_count": null,
|
| 1077 |
+
"id": "b62c92f5-4a8e-4c42-904e-18ca513cae1d",
|
| 1078 |
+
"metadata": {},
|
| 1079 |
+
"outputs": [],
|
| 1080 |
+
"source": []
|
| 1081 |
+
}
|
| 1082 |
+
],
|
| 1083 |
+
"metadata": {
|
| 1084 |
+
"kernelspec": {
|
| 1085 |
+
"display_name": "Python [conda env:base] *",
|
| 1086 |
+
"language": "python",
|
| 1087 |
+
"name": "conda-base-py"
|
| 1088 |
+
},
|
| 1089 |
+
"language_info": {
|
| 1090 |
+
"codemirror_mode": {
|
| 1091 |
+
"name": "ipython",
|
| 1092 |
+
"version": 3
|
| 1093 |
+
},
|
| 1094 |
+
"file_extension": ".py",
|
| 1095 |
+
"mimetype": "text/x-python",
|
| 1096 |
+
"name": "python",
|
| 1097 |
+
"nbconvert_exporter": "python",
|
| 1098 |
+
"pygments_lexer": "ipython3",
|
| 1099 |
+
"version": "3.12.7"
|
| 1100 |
+
}
|
| 1101 |
+
},
|
| 1102 |
+
"nbformat": 4,
|
| 1103 |
+
"nbformat_minor": 5
|
| 1104 |
+
}
|