File size: 2,011 Bytes
544ea81 98b05d3 544ea81 1f80db6 544ea81 75ab5bb 544ea81 98b05d3 544ea81 3b564e5 98b05d3 544ea81 98b05d3 544ea81 98b05d3 544ea81 98b05d3 544ea81 98b05d3 544ea81 98b05d3 544ea81 98b05d3 544ea81 98b05d3 544ea81 98b05d3 544ea81 5569ad3 544ea81 98b05d3 21f1cb6 98b05d3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
---
library_name: transformers
license: apache-2.0
language:
- en
- ko
base_model:
- numind/NuExtract-1.5
tags:
- llama-factory
---
# Automatic Schema Induction(text-to-schema) Model
This model is a sub-task of text-to-json task that generates a JSON template given a text.
# Usage
```python
import json
import torch
from transformers import AutoModel, AutoTokenizer
model_name = "chnaaam/luSI-v1.0"
if torch.cuda.is_available():
device = "cuda"
elif torch.backends.mps.is_available():
device = "mps"
else:
device = "cpu"
model = AutoModel.from_pretrained(model_name, torch_dtype=torch.bfloat16, trust_remote_code=True).to(device).eval()
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
text = """์์ด์ (IU, ๋ณธ๋ช
: ์ด์ง์, ๆ็ฅๆฉ[1], 1993๋
5์ 16์ผ~)๋ ๋ํ๋ฏผ๊ตญ์ ์ฑ์ด์ก๋ผ์ดํฐ, ์๊ณก๊ฐ, ๋ฐฐ์ฐ์ด๋ค. 2007๋
๋ก์ ์ํฐํ
์ธ๋จผํธ(ํ ์นด์นด์ค ์ํฐํ
์ธ๋จผํธ) ์ฐ์ต์์ผ๋ก ์ ์ ๊ณ์ฝ์ ๋งบ๊ณ 15์ธ์ ๋์ด์ 2008๋
์ฒซ EP์ธ ๋ก์คํธ ์ค ํ์ด๋(Lost and Found)๋ฅผ ํตํด ๊ฐ์๋ก ๋ฐ๋ทํ๋ค."""
messages = [
{"role": "user", "content": text}
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
generated_ids = model.generate(
**model_inputs,
max_new_tokens=1024,
temperature=0.0
)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
json_template = json.loads(response)
print(json_template)
```
## Output
```json
{
'Person': {
'Name': '',
'Stage name': '',
'Real name': '',
'Birth date': '',
'Nationality': '',
'Occupations': [],
'Debut': {
'Age': '',
'Year': '',
'Company': '',
'Contract type': '',
'EP': '',
'EP title': ''
}
}
}
``` |