Update README.md
Browse files
README.md
CHANGED
|
@@ -3,6 +3,28 @@ language:
|
|
| 3 |
- ko
|
| 4 |
library_name: transformers
|
| 5 |
pipeline_tag: token-classification
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
- ko
|
| 4 |
library_name: transformers
|
| 5 |
pipeline_tag: token-classification
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
```
|
| 9 |
+
import torch
|
| 10 |
+
org_text = "ํ์์ค๋ฆฝ๊ณผESG๊ฒฝ์์๋ํ์ฌํ์ ์๊ตฌํ๋".replace(" ", "") # ๊ณต๋ฐฑ์ ๊ฑฐ
|
| 11 |
+
label = ["UNK", "PAD", "O", "B", "I", "E", "S"]
|
| 12 |
+
# char ๋จ์๋ก ํ ํฐํ
|
| 13 |
+
token_list = [tokenizer.cls_token_id]
|
| 14 |
+
for char in org_text:
|
| 15 |
+
token_list.append(tokenizer.encode(char)[1])
|
| 16 |
+
token_list.append(tokenizer.eos_token_id)
|
| 17 |
+
tkd = torch.tensor(token_list).unsqueeze(0)
|
| 18 |
+
|
| 19 |
+
output = roberta(tkd).logits
|
| 20 |
+
|
| 21 |
+
_, pred_idx = torch.max(output, dim=2)
|
| 22 |
+
tags = [label[idx] for idx in pred_idx.squeeze()][1:-1]
|
| 23 |
+
pred_sent = ""
|
| 24 |
+
for char_idx, spc_idx in enumerate(pred_idx.squeeze()[1:-1]):
|
| 25 |
+
# "E" tag ๋จ์๋ก ๋์ด์ฐ๊ธฐ
|
| 26 |
+
if label[spc_idx] == "E": pred_sent += org_text[char_idx] + " "
|
| 27 |
+
else: pred_sent += org_text[char_idx]
|
| 28 |
+
|
| 29 |
+
print(pred_sent)
|
| 30 |
+
```
|