chuuhtetnaing commited on
Commit
c3cdb31
·
verified ·
1 Parent(s): 72fa097

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +80 -0
README.md CHANGED
@@ -52,6 +52,22 @@ Fine-tuned [myanmar-pos-model](https://huggingface.co/chuuhtetnaing/myanmar-pos-
52
  | 29 | 0.0274 | 0.0837 | 0.8855 | 0.9272 | 0.9058 | 0.9804 |
53
  | 30 | 0.0271 | 0.0832 | 0.8875 | 0.9267 | 0.9067 | 0.9806 |
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  ## Training Details
56
 
57
  | Parameter | Value |
@@ -71,6 +87,70 @@ result = ner("ကိုမောင်သည်ရန်ကုန်မြို
71
  print(result)
72
  ```
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  ## NER Labels
75
 
76
  | Tag | Description |
 
52
  | 29 | 0.0274 | 0.0837 | 0.8855 | 0.9272 | 0.9058 | 0.9804 |
53
  | 30 | 0.0271 | 0.0832 | 0.8875 | 0.9267 | 0.9067 | 0.9806 |
54
 
55
+ ## Test Set Evaluation
56
+
57
+ Evaluated on [myanmar-ner-dataset](https://huggingface.co/datasets/chuuhtetnaing/myanmar-ner-dataset) test split using seqeval metrics:
58
+
59
+ | Entity | Precision | Recall | F1-Score | Support |
60
+ |--------|-----------|--------|----------|---------|
61
+ | DATE | 0.80 | 0.86 | 0.83 | 251 |
62
+ | LOC | 0.93 | 0.96 | 0.95 | 2712 |
63
+ | NUM | 0.89 | 0.92 | 0.90 | 789 |
64
+ | ORG | 0.44 | 0.62 | 0.52 | 94 |
65
+ | PER | 0.84 | 0.88 | 0.86 | 533 |
66
+ | TIME | 0.62 | 0.70 | 0.66 | 57 |
67
+ | **micro avg** | **0.89** | **0.93** | **0.91** | 4436 |
68
+ | **macro avg** | 0.75 | 0.82 | 0.78 | 4436 |
69
+ | **weighted avg** | **0.89** | **0.93** | **0.91** | 4436 |
70
+
71
  ## Training Details
72
 
73
  | Parameter | Value |
 
87
  print(result)
88
  ```
89
 
90
+ ## Evaluation Code
91
+
92
+ ```python
93
+ !pip install seqeval
94
+
95
+ from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
96
+ from datasets import load_dataset
97
+ from tqdm import tqdm
98
+ from seqeval.metrics import classification_report
99
+
100
+ # Load model and tokenizer
101
+ model = AutoModelForTokenClassification.from_pretrained("chuuhtetnaing/myanmar-ner-model")
102
+ tokenizer = AutoTokenizer.from_pretrained("chuuhtetnaing/myanmar-ner-model")
103
+
104
+ def tokenize_and_align_labels(examples):
105
+ tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)
106
+ labels = []
107
+ for i, label in enumerate(examples["ner_tags"]):
108
+ word_ids = tokenized_inputs.word_ids(batch_index=i)
109
+ previous_word_idx = None
110
+ label_ids = []
111
+ for word_idx in word_ids:
112
+ if word_idx is None:
113
+ label_ids.append(-100)
114
+ elif word_idx != previous_word_idx:
115
+ label_ids.append(label[word_idx])
116
+ else:
117
+ label_ids.append(-100)
118
+ previous_word_idx = word_idx
119
+ labels.append(label_ids)
120
+ tokenized_inputs["labels"] = labels
121
+ return tokenized_inputs
122
+
123
+ # Load and tokenize dataset
124
+ ner = pipeline("token-classification", model="chuuhtetnaing/myanmar-ner-model", aggregation_strategy=None)
125
+ ds = load_dataset("chuuhtetnaing/myanmar-ner-dataset")
126
+ tokenized_ds = ds.map(tokenize_and_align_labels, batched=True)
127
+ test_ds = tokenized_ds["test"]
128
+
129
+ # Get label mapping
130
+ label_list = model.config.id2label
131
+
132
+ y_true = []
133
+ y_pred = []
134
+
135
+ for example in tqdm(test_ds):
136
+ tokens = tokenizer.convert_ids_to_tokens(example["input_ids"])
137
+ true_labels = [label_list[l] if l != -100 else "O" for l in example["labels"]]
138
+
139
+ text = tokenizer.decode(example["input_ids"], skip_special_tokens=True)
140
+ preds = ner(text)
141
+
142
+ pred_labels = ["O"] * len(true_labels)
143
+ for pred in preds:
144
+ idx = pred["index"]
145
+ if idx < len(pred_labels):
146
+ pred_labels[idx] = pred["entity"]
147
+
148
+ y_true.append([label_list[l] for l in example["labels"] if l != -100])
149
+ y_pred.append([p for p, l in zip(pred_labels, example["labels"]) if l != -100])
150
+
151
+ print(classification_report(y_true, y_pred))
152
+ ```
153
+
154
  ## NER Labels
155
 
156
  | Tag | Description |