roberthsu2003
/

models_for_ner

@@ -37,7 +37,7 @@ It achieves the following results on the evaluation set:
 ## Model description
-### 使用方法(how to use)
 ```python
 ner_pipe = pipeline("token-classification", model='roberthsu2003/models_for_ner',aggregation_strategy="simple")
@@ -55,6 +55,87 @@ ner_result
 {'PER': ['徐國堂'], 'LOC': ['台北']}
 ```
 ## Intended uses & limitations
 More information needed

 ## Model description
+### 使用方法(pipline的方法)
 ```python
 ner_pipe = pipeline("token-classification", model='roberthsu2003/models_for_ner',aggregation_strategy="simple")
 {'PER': ['徐國堂'], 'LOC': ['台北']}
 ```
+### 使用方法(model,tokenizer)
+```python
+from transformers import AutoModelForTokenClassification, AutoTokenizer
+import numpy as np
+# Load the pre-trained model and tokenizer
+model = AutoModelForTokenClassification.from_pretrained('roberthsu2003/models_for_ner')
+tokenizer = AutoTokenizer.from_pretrained('roberthsu2003/models_for_ner')
+# The label mapping (you might need to adjust this based on your training)
+label_list = ['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC']
+def predict_ner(text):
+    """Predicts NER tags for a given text using the loaded model."""
+    # Encode the text
+    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
+    # Get model predictions
+    outputs = model(**inputs)
+    predictions = np.argmax(outputs.logits.detach().numpy(), axis=-1)
+    # Get the word IDs from the encoded inputs
+    # This is the key change - word_ids() is a method on the encoding result, not the tokenizer itself
+    word_ids = inputs.word_ids(batch_index=0)
+    pred_tags = []
+    for word_id, pred in zip(word_ids, predictions[0]):
+        if word_id is None:
+            continue  # Skip special tokens
+        pred_tags.append(label_list[pred])
+    return pred_tags
+#To get the entities, you'll need to group consecutive non-O tags:
+def get_entities(tags):
+    """Groups consecutive NER tags to extract entities."""
+    entities = []
+    start_index = -1
+    current_entity_type = None
+    for i, tag in enumerate(tags):
+        if tag != 'O':
+            if start_index == -1:
+                start_index = i
+                current_entity_type = tag[2:] # Extract entity type (e.g., PER, LOC, ORG)
+        else: #tag == 'O'
+            if start_index != -1:
+                entities.append((start_index, i, current_entity_type))
+                start_index = -1
+                current_entity_type = None
+    if start_index != -1:
+        entities.append((start_index, len(tags), current_entity_type))
+    return entities
+# Example usage:
+text = "徐國堂在台北上班"
+ner_tags = predict_ner(text)
+print(f"Text: {text}")
+#==output==
+#Text: 徐國堂在台北上班
+print(f"NER Tags: {ner_tags}")
+#===output==
+#NER Tags: ['B-PER', 'I-PER', 'I-PER', 'O', 'B-LOC', 'I-LOC', 'O', 'O']
+entities = get_entities(ner_tags)
+word_tokens = tokenizer.tokenize(text)  # Tokenize to get individual words
+print(f"Entities:")
+for start, end, entity_type in entities:
+    entity_text = "".join(word_tokens[start:end])
+    print(f"- {entity_text}: {entity_type}")
+#==output==
+#Entities:
+#- 徐國堂: PER
+#- 台北: LOC
+```
 ## Intended uses & limitations
 More information needed