Tomohiro
/

MedTXTNER

Token Classification

Model card Files Files and versions

Tomohiro commited on Apr 22, 2025

Commit

fd9e32a

·

verified ·

1 Parent(s): b8e7134

Update README.md

Files changed (1) hide show

README.md +2 -13

README.md CHANGED Viewed

@@ -32,19 +32,13 @@ tags:
 import torch
 from transformers import AutoTokenizer, AutoModelForTokenClassification
-# 1) チェックポイントディレクトリを指定
-checkpoint_dir = "Tomohiro/MedTXTNER"
-# 2) モデルとトークナイザーをロード
-model     = AutoModelForTokenClassification.from_pretrained(checkpoint_dir)
 tokenizer = AutoTokenizer.from_pretrained(checkpoint_dir, use_fast=True)
-# 3) デバイス設定
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 model.eval()
-# 4) 推論用
 def predict_text(text: str):
     enc = tokenizer(
         text,
@@ -59,14 +53,10 @@ def predict_text(text: str):
         outputs = model(**enc)
         logits  = outputs.logits
-    # 各トークンごとの予測ラベルIDを取得
     pred_ids = torch.argmax(logits, dim=-1)[0].cpu().tolist()
-    # トークン列と IOB ラベル列に変換
     tokens = tokenizer.convert_ids_to_tokens(enc["input_ids"][0])
     id2label = model.config.id2label
-    # special tokens を除いて結果を整形
     result = []
     for tok, pid in zip(tokens, pred_ids):
         if tok in tokenizer.all_special_tokens:
@@ -74,7 +64,6 @@ def predict_text(text: str):
         result.append((tok, id2label[pid]))
     return result
-# 5) 実際に試す
 sample = "症例】５３歳女性。発熱と嘔気を認め、プレドニゾロンを中断しました。"
 for tok, lab in predict_text(sample):
     print(f"{tok}\t{lab}")

 import torch
 from transformers import AutoTokenizer, AutoModelForTokenClassification
+model_dir = "Tomohiro/MedTXTNER"
+model     = AutoModelForTokenClassification.from_pretrained(model_dir)
 tokenizer = AutoTokenizer.from_pretrained(checkpoint_dir, use_fast=True)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 model.eval()
 def predict_text(text: str):
     enc = tokenizer(
         text,
         outputs = model(**enc)
         logits  = outputs.logits
     pred_ids = torch.argmax(logits, dim=-1)[0].cpu().tolist()
     tokens = tokenizer.convert_ids_to_tokens(enc["input_ids"][0])
     id2label = model.config.id2label
     result = []
     for tok, pid in zip(tokens, pred_ids):
         if tok in tokenizer.all_special_tokens:
         result.append((tok, id2label[pid]))
     return result
 sample = "症例】５３歳女性。発熱と嘔気を認め、プレドニゾロンを中断しました。"
 for tok, lab in predict_text(sample):
     print(f"{tok}\t{lab}")