| from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline |
|
|
| |
| MODEL_NAME = "Porameht/wangchanberta-thainer-corpus-v2-2" |
|
|
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
| model = AutoModelForTokenClassification.from_pretrained(MODEL_NAME) |
|
|
| |
| print("Labels this model can detect:") |
| for id, label in model.config.id2label.items(): |
| print(f" {id:2}: {label}") |
| print() |
|
|
| |
| |
| ner = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple") |
|
|
| |
| test_sentences = [ |
| "ไปมัตสึโมโตวันที่ 29 พฤษภาคม", |
| "ไปมัตสึโมตวันที่ 29 พฤษาคม", |
| "วันที่ 30 พ.ค. ออกเดินทางจากฮาคุบะไปคามิโคจิ", |
| "เช็คอินโรงแรมที่โตเกียวตอน 15:00 น.", |
| "วันที่ 29 ทำอะไรบ้าง", |
| "29" |
| ] |
|
|
| USEFUL_LABELS = {"LOCATION", "DATE", "TIME", "FACILITY"} |
|
|
| for sentence in test_sentences: |
| print(f"Input : {sentence}") |
| results = ner(sentence) |
|
|
| if not results: |
| print(" (no entities found)") |
| else: |
| for ent in results: |
| label = ent["entity_group"] |
| word = ent["word"] |
| score = ent["score"] |
| marker = " ✓" if label in USEFUL_LABELS else "" |
| print(f" [{label}] '{word}' (score: {score:.3f}){marker}") |
| print() |
|
|
|
|
| |