roberthsu2003 commited on
Commit
1a08da7
·
verified ·
1 Parent(s): 4b485c9

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +82 -1
README.md CHANGED
@@ -37,7 +37,7 @@ It achieves the following results on the evaluation set:
37
 
38
  ## Model description
39
 
40
- ### 使用方法(how to use)
41
 
42
  ```python
43
  ner_pipe = pipeline("token-classification", model='roberthsu2003/models_for_ner',aggregation_strategy="simple")
@@ -55,6 +55,87 @@ ner_result
55
  {'PER': ['徐國堂'], 'LOC': ['台北']}
56
  ```
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  ## Intended uses & limitations
59
 
60
  More information needed
 
37
 
38
  ## Model description
39
 
40
+ ### 使用方法(pipline的方法)
41
 
42
  ```python
43
  ner_pipe = pipeline("token-classification", model='roberthsu2003/models_for_ner',aggregation_strategy="simple")
 
55
  {'PER': ['徐國堂'], 'LOC': ['台北']}
56
  ```
57
 
58
+ ### 使用方法(model,tokenizer)
59
+
60
+ ```python
61
+ from transformers import AutoModelForTokenClassification, AutoTokenizer
62
+ import numpy as np
63
+
64
+ # Load the pre-trained model and tokenizer
65
+ model = AutoModelForTokenClassification.from_pretrained('roberthsu2003/models_for_ner')
66
+ tokenizer = AutoTokenizer.from_pretrained('roberthsu2003/models_for_ner')
67
+
68
+ # The label mapping (you might need to adjust this based on your training)
69
+ label_list = ['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC']
70
+
71
+ def predict_ner(text):
72
+ """Predicts NER tags for a given text using the loaded model."""
73
+ # Encode the text
74
+ inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
75
+
76
+ # Get model predictions
77
+ outputs = model(**inputs)
78
+ predictions = np.argmax(outputs.logits.detach().numpy(), axis=-1)
79
+
80
+ # Get the word IDs from the encoded inputs
81
+ # This is the key change - word_ids() is a method on the encoding result, not the tokenizer itself
82
+ word_ids = inputs.word_ids(batch_index=0)
83
+
84
+ pred_tags = []
85
+ for word_id, pred in zip(word_ids, predictions[0]):
86
+ if word_id is None:
87
+ continue # Skip special tokens
88
+ pred_tags.append(label_list[pred])
89
+
90
+ return pred_tags
91
+
92
+ #To get the entities, you'll need to group consecutive non-O tags:
93
+
94
+ def get_entities(tags):
95
+ """Groups consecutive NER tags to extract entities."""
96
+ entities = []
97
+ start_index = -1
98
+ current_entity_type = None
99
+ for i, tag in enumerate(tags):
100
+ if tag != 'O':
101
+ if start_index == -1:
102
+ start_index = i
103
+ current_entity_type = tag[2:] # Extract entity type (e.g., PER, LOC, ORG)
104
+ else: #tag == 'O'
105
+ if start_index != -1:
106
+ entities.append((start_index, i, current_entity_type))
107
+ start_index = -1
108
+ current_entity_type = None
109
+ if start_index != -1:
110
+ entities.append((start_index, len(tags), current_entity_type))
111
+ return entities
112
+
113
+ # Example usage:
114
+ text = "徐國堂在台北上班"
115
+ ner_tags = predict_ner(text)
116
+ print(f"Text: {text}")
117
+ #==output==
118
+ #Text: 徐國堂在台北上班
119
+
120
+
121
+ print(f"NER Tags: {ner_tags}")
122
+ #===output==
123
+ #NER Tags: ['B-PER', 'I-PER', 'I-PER', 'O', 'B-LOC', 'I-LOC', 'O', 'O']
124
+
125
+
126
+ entities = get_entities(ner_tags)
127
+ word_tokens = tokenizer.tokenize(text) # Tokenize to get individual words
128
+ print(f"Entities:")
129
+ for start, end, entity_type in entities:
130
+ entity_text = "".join(word_tokens[start:end])
131
+ print(f"- {entity_text}: {entity_type}")
132
+
133
+ #==output==
134
+ #Entities:
135
+ #- 徐國堂: PER
136
+ #- 台北: LOC
137
+ ```
138
+
139
  ## Intended uses & limitations
140
 
141
  More information needed