fiveflow commited on
Commit
26e1084
ยท
1 Parent(s): 05340ec

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +25 -3
README.md CHANGED
@@ -3,6 +3,28 @@ language:
3
  - ko
4
  library_name: transformers
5
  pipeline_tag: token-classification
6
- widget:
7
- - text: "ํƒ„์†Œ์ค‘๋ฆฝ๊ณผESG๊ฒฝ์˜์—๋Œ€ํ•œ์‚ฌํšŒ์ ์š”๊ตฌํ™•๋Œ€"
8
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  - ko
4
  library_name: transformers
5
  pipeline_tag: token-classification
6
+ ---
7
+
8
+ ```
9
+ import torch
10
+ org_text = "ํƒ„์†Œ์ค‘๋ฆฝ๊ณผESG๊ฒฝ์˜์—๋Œ€ํ•œ์‚ฌํšŒ์ ์š”๊ตฌํ™•๋Œ€".replace(" ", "") # ๊ณต๋ฐฑ์ œ๊ฑฐ
11
+ label = ["UNK", "PAD", "O", "B", "I", "E", "S"]
12
+ # char ๋‹จ์œ„๋กœ ํ† ํฐํ™”
13
+ token_list = [tokenizer.cls_token_id]
14
+ for char in org_text:
15
+ token_list.append(tokenizer.encode(char)[1])
16
+ token_list.append(tokenizer.eos_token_id)
17
+ tkd = torch.tensor(token_list).unsqueeze(0)
18
+
19
+ output = roberta(tkd).logits
20
+
21
+ _, pred_idx = torch.max(output, dim=2)
22
+ tags = [label[idx] for idx in pred_idx.squeeze()][1:-1]
23
+ pred_sent = ""
24
+ for char_idx, spc_idx in enumerate(pred_idx.squeeze()[1:-1]):
25
+ # "E" tag ๋‹จ์œ„๋กœ ๋„์–ด์“ฐ๊ธฐ
26
+ if label[spc_idx] == "E": pred_sent += org_text[char_idx] + " "
27
+ else: pred_sent += org_text[char_idx]
28
+
29
+ print(pred_sent)
30
+ ```