auhide
/

bert-bg-ner

Token Classification

Model card Files Files and versions

auhide commited on Mar 20, 2023

Commit

6abf77b

·

1 Parent(s): b93da0f

Update README.md

Files changed (1) hide show

README.md +6 -6

README.md CHANGED Viewed

@@ -15,10 +15,10 @@ Fine-tuned on a Bulgarian subset of [wikiann](https://huggingface.co/datasets/wi
 ## Usage
 Import the libraries:
 ```python
-from typing import List
 import torch
-from transformers import AutoModelForTokenClassification, AutoTokenizer
 ```
 Firstly, you'll have to define these methods, since we are using a subword Tokenizer:
@@ -32,7 +32,7 @@ def predict(
         1: "B-PER", 2: "I-PER",
         3: "B-ORG", 4: "I-ORG",
         5: "B-LOC", 6: "I-LOC"
-    }):
     tokens_data = tokenizer(text)
     tokens = tokenizer.convert_ids_to_tokens(tokens_data["input_ids"])
     words = subwords_to_words(tokens)
@@ -75,7 +75,7 @@ def subwords_to_words(tokens: List[str]) -> List[str]:
     return out_tokens
-def merge_words_and_predictions(words, entities):
     result = []
     curr_word = []
@@ -85,7 +85,7 @@ def merge_words_and_predictions(words, entities):
                 curr_word = " ".join(curr_word)
                 result.append({
                     "word": curr_word,
-                    "entity": entities[i][2:]
                 })
                 curr_word = [word]
             else:
@@ -99,7 +99,7 @@ def merge_words_and_predictions(words, entities):
                 curr_word = " ".join(curr_word)
                 result.append({
                     "word": curr_word,
-                    "entity": entities[i][2:]
                 })
             curr_word = []

 ## Usage
 Import the libraries:
 ```python
+from typing import List, Dict
 import torch
+from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline
 ```
 Firstly, you'll have to define these methods, since we are using a subword Tokenizer:
         1: "B-PER", 2: "I-PER",
         3: "B-ORG", 4: "I-ORG",
         5: "B-LOC", 6: "I-LOC"
+    }) -> List[Dict[str, str]]:
     tokens_data = tokenizer(text)
     tokens = tokenizer.convert_ids_to_tokens(tokens_data["input_ids"])
     words = subwords_to_words(tokens)
     return out_tokens
+def merge_words_and_predictions(words: List[str], entities: List[str]) -> List[Dict[str, str]]:
     result = []
     curr_word = []
                 curr_word = " ".join(curr_word)
                 result.append({
                     "word": curr_word,
+                    "entity_group": entities[i][2:]
                 })
                 curr_word = [word]
             else:
                 curr_word = " ".join(curr_word)
                 result.append({
                     "word": curr_word,
+                    "entity_group": entities[i][2:]
                 })
             curr_word = []