Update README.md
Browse files
README.md
CHANGED
|
@@ -88,8 +88,57 @@ Fine-tuning์ ํ์ฉํ ๋ฒ ์ด์ค ๋ชจ๋ธ ๋ฐ ํ์ดํผ ํ๋ผ๋ฏธํฐ๋ ๋ค์
|
|
| 88 |
## 4. Example
|
| 89 |
์ด ๋ชจ๋ธ์ Context๋ฅผ ์ธ์ฝ๋ฉํ๋ ๋ชจ๋ธ๋ก, Question ๋ชจ๋ธ๊ณผ ํจ๊ป ์ฌ์ฉํด์ผ ํฉ๋๋ค.
|
| 90 |
๋์ผํ ์ง๋ณ์ ๊ดํ ์ง๋ฌธ๊ณผ ํ
์คํธ๊ฐ ๋์ ์ ์ฌ๋๋ฅผ ๋ณด์ธ๋ค๋ ์ฌ์ค์ ํ์ธํ ์ ์์ต๋๋ค.
|
|
|
|
| 91 |
|
| 92 |
```python
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
```
|
| 94 |
|
| 95 |
|
|
|
|
| 88 |
## 4. Example
|
| 89 |
์ด ๋ชจ๋ธ์ Context๋ฅผ ์ธ์ฝ๋ฉํ๋ ๋ชจ๋ธ๋ก, Question ๋ชจ๋ธ๊ณผ ํจ๊ป ์ฌ์ฉํด์ผ ํฉ๋๋ค.
|
| 90 |
๋์ผํ ์ง๋ณ์ ๊ดํ ์ง๋ฌธ๊ณผ ํ
์คํธ๊ฐ ๋์ ์ ์ฌ๋๋ฅผ ๋ณด์ธ๋ค๋ ์ฌ์ค์ ํ์ธํ ์ ์์ต๋๋ค.
|
| 91 |
+
ํ์ต ๋ฐ์ดํฐ์ ํน์ฑ ์, ์์ ๋ณด๋ค ์ ์ ๋ ์๋ฃ ํ
์คํธ์ ๋ํด ๋ ์ ์๋ํฉ๋๋ค.
|
| 92 |
|
| 93 |
```python
|
| 94 |
+
import numpy as np
|
| 95 |
+
from transformers import AutoModel, AutoTokenizer
|
| 96 |
+
|
| 97 |
+
# Question Model
|
| 98 |
+
q_model_path = 'snumin44/medical-biencoder-ko-bert-question'
|
| 99 |
+
q_model = AutoModel.from_pretrained(q_model_path)
|
| 100 |
+
q_tokenizer = AutoTokenizer.from_pretrained(q_model_path)
|
| 101 |
+
|
| 102 |
+
# Context Model
|
| 103 |
+
c_model_path = 'snumin44/medical-biencoder-ko-bert-context'
|
| 104 |
+
c_model = AutoModel.from_pretrained(c_model_path)
|
| 105 |
+
c_tokenizer = AutoTokenizer.from_pretrained(c_model_path)
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
query = 'high blood pressure ์ฒ๋ฐฉ ์ฌ๋ก'
|
| 109 |
+
|
| 110 |
+
targets = [
|
| 111 |
+
"""๊ณ ํ์ ์ง๋จ.
|
| 112 |
+
ํ์ ์๋ด ๋ฐ ์ํ์ต๊ด ๊ต์ ๊ถ๊ณ . ์ ์ผ์, ๊ท์น์ ์ธ ์ด๋, ๊ธ์ฐ, ๊ธ์ฃผ ์ง์.
|
| 113 |
+
ํ์ ์ฌ๋ฐฉ๋ฌธ. ํ์: 150/95mmHg. ์ฝ๋ฌผ์น๋ฃ ์์. Amlodipine 5mg 1์ผ 1ํ ์ฒ๋ฐฉ.""",
|
| 114 |
+
|
| 115 |
+
"""์๊ธ์ค ๋์ฐฉ ํ ์ ๋ด์๊ฒฝ ์งํ.
|
| 116 |
+
์๊ฒฌ: Gastric ulcer์์ Forrest IIb ๊ด์ฐฐ๋จ. ์ถํ์ ์๋์ ์ผ์ถ์ฑ ์ถํ ํํ.
|
| 117 |
+
์ฒ์น: ์ํผ๋คํ๋ฆฐ ์ฃผ์ฌ๋ก ์ถํ ๊ฐ์ ํ์ธ. Hemoclip 2๊ฐ๋ก ์ถํ ๋ถ์ ํด๋ฆฌํํ์ฌ ์งํ ์๋ฃ.""",
|
| 118 |
+
|
| 119 |
+
"""ํ์ค ๋์ ์ง๋ฐฉ ์์น ๋ฐ ์ง๋ฐฉ๊ฐ ์๊ฒฌ.
|
| 120 |
+
๋ค๋ฐ์ฑ gallstones ํ์ธ. ์ฆ์ ์์ ๊ฒฝ์ฐ ๊ฒฝ๊ณผ ๊ด์ฐฐ ๊ถ์ฅ.
|
| 121 |
+
์ฐ์ธก renal cyst, ์์ฑ ๊ฐ๋ฅ์ฑ ๋์ผ๋ฉฐ ์ถ๊ฐ์ ์ธ ์ฒ์น ๋ถํ์ ํจ."""
|
| 122 |
+
]
|
| 123 |
+
|
| 124 |
+
query_feature = q_tokenizer(query, return_tensors='pt')
|
| 125 |
+
query_outputs = q_model(**query_feature, return_dict=True)
|
| 126 |
+
query_embeddings = query_outputs.pooler_output.detach().numpy().squeeze()
|
| 127 |
+
|
| 128 |
+
def cos_sim(A, B):
|
| 129 |
+
return np.dot(A, B) / (np.linalg.norm(A) * np.linalg.norm(B))
|
| 130 |
+
|
| 131 |
+
for idx, target in enumerate(targets):
|
| 132 |
+
target_feature = c_tokenizer(target, return_tensors='pt')
|
| 133 |
+
target_outputs = c_model(**target_feature, return_dict=True)
|
| 134 |
+
target_embeddings = target_outputs.pooler_output.detach().numpy().squeeze()
|
| 135 |
+
similarity = cos_sim(query_embeddings, target_embeddings)
|
| 136 |
+
print(f"Similarity between query and target {idx}: {similarity:.4f}")
|
| 137 |
+
```
|
| 138 |
+
```
|
| 139 |
+
Similarity between query and target 0: 0.2674
|
| 140 |
+
Similarity between query and target 1: 0.0416
|
| 141 |
+
Similarity between query and target 2: 0.0476
|
| 142 |
```
|
| 143 |
|
| 144 |
|