stulcrad
/

CNEC2_0_nested_robeczech-base

Token Classification

Model card Files Files and versions

stulcrad commited on Feb 11, 2025

Commit

c361a7e

·

verified ·

1 Parent(s): c14fc15

Create README.md

Files changed (1) hide show

README.md +40 -0

README.md ADDED Viewed

	@@ -0,0 +1,40 @@

+How to use
+```
+label_names = [
+                'O',
+                'B-P', 'I-P', 'B-T', 'I-T', 'B-A', 'I-A', 'B-C', 'I-C',
+                'B-ah', 'I-ah', 'B-at', 'I-at', 'B-az', 'I-az',
+                'B-g_', 'I-g_', 'B-gc', 'I-gc', 'B-gh', 'I-gh',
+                'B-gl', 'I-gl', 'B-gq', 'I-gq', 'B-gr', 'I-gr',
+                'B-gs', 'I-gs', 'B-gt', 'I-gt', 'B-gu', 'I-gu',
+                'B-i_', 'I-i_', 'B-ia', 'I-ia', 'B-ic', 'I-ic',
+                'B-if', 'I-if', 'B-io', 'I-io', 'B-me', 'I-me',
+                'B-mi', 'I-mi', 'B-mn', 'I-mn', 'B-ms', 'I-ms',
+                'B-n_', 'I-n_', 'B-na', 'I-na', 'B-nb', 'I-nb',
+                'B-nc', 'I-nc', 'B-ni', 'I-ni', 'B-no', 'I-no',
+                'B-ns', 'I-ns', 'B-o_', 'I-o_', 'B-oa', 'I-oa',
+                'B-oe', 'I-oe', 'B-om', 'I-om', 'B-op', 'I-op',
+                'B-or', 'I-or', 'B-p_', 'I-p_', 'B-pc', 'I-pc',
+                'B-pd', 'I-pd', 'B-pf', 'I-pf', 'B-pm', 'I-pm',
+                'B-pp', 'I-pp', 'B-ps', 'I-ps', 'B-td', 'I-td',
+                'B-tf', 'I-tf', 'B-th', 'I-th', 'B-tm', 'I-tm', 'B-ty', 'I-ty']
+model = AutoModelForTokenClassification.from_pretrained(stulcrad/CNEC2_0_nested_robeczech-base)
+device = get_device()
+model.to(device)
+tokenizer = AutoTokenizer.from_pretrained(stulcrad/CNEC2_0_nested_robeczech-base, add_prefix_space=True)
+text = "Bydlim v Usti nad Labem"
+inputs = tokenizer(text, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs).logits
+flat_outputs = outputs.squeeze()
+pred = flat_outputs.heaviside(torch.tensor([0.0], device=device)).int().tolist()
+tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"].squeeze())
+for token, p, flat_o in zip(tokens, pred, flat_outputs):
+    outs = [label_names[i] for i in range(len(label_names)) if p[i] == 1 and label_names[i] != 'O']
+    if outs:
+        print(f"{token:<10} {outs}")
+```