Create README.md
Browse files
README.md
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Youth_Chatbot_KoGPT2-base
|
| 2 |
+
|
| 3 |
+
**Demo Web**: [Ainize Endpoint](https://main-youth-chatbot-ko-gpt2-base-east-h-shin.endpoint.ainize.ai/)
|
| 4 |
+
<br>
|
| 5 |
+
**Demo Web Code**: [Github](https://github.com/EastHShin/Youth_Chatbot_KoGPT2-base)
|
| 6 |
+
<br>
|
| 7 |
+
**Youth-Chatbot API**: [Ainize API](https://ainize.ai/EastHShin/Youth_Chatbot_KoGPT2-base_API?branch=main)
|
| 8 |
+
<br>
|
| 9 |
+
<br>
|
| 10 |
+
|
| 11 |
+
## Overview
|
| 12 |
+
**Language model**: KoGPT2
|
| 13 |
+
<br>
|
| 14 |
+
**Language**: Korean
|
| 15 |
+
<br>
|
| 16 |
+
**Training data**: [Aihub](https://aihub.or.kr/aidata/7978)
|
| 17 |
+
|
| 18 |
+
## Usage
|
| 19 |
+
```
|
| 20 |
+
from transformers import PreTrainedTokenizerFast, GPT2LMHeadModel
|
| 21 |
+
U_TKN = '<usr>'
|
| 22 |
+
S_TKN = '<sys>'
|
| 23 |
+
MASK = '<unused0>'
|
| 24 |
+
SENT = '<unused1>'
|
| 25 |
+
tokenizer = PreTrainedTokenizerFast.from_pretrained("EasthShin/Youth_Chatbot_Kogpt2-base",
|
| 26 |
+
bos_token='</s>', eos_token='</s>', unk_token='<unk>',
|
| 27 |
+
pad_token='<pad>', mask_token=MASK)
|
| 28 |
+
|
| 29 |
+
model = GPT2LMHeadModel.from_pretrained('EasthShin/Youth_Chatbot_Kogpt2-base')
|
| 30 |
+
input_ids = tokenizer.encode(U_TKN + {your text} + sent + S_TKN)
|
| 31 |
+
gen_ids = model.generate(torch.tensor([input_ids]),
|
| 32 |
+
max_length=128,
|
| 33 |
+
repetition_penalty= 2.0,
|
| 34 |
+
pad_token_id=tokenizer.pad_token_id,
|
| 35 |
+
eos_token_id=tokenizer.eos_token_id,
|
| 36 |
+
bos_token_id=tokenizer.bos_token_id,
|
| 37 |
+
use_cache=True)
|
| 38 |
+
|
| 39 |
+
generated = tokenizer.decode(gen_ids[0, :].tolist())
|
| 40 |
+
|
| 41 |
+
print(generated)
|
| 42 |
+
```
|