kiendt commited on
Commit
5872b80
·
verified ·
1 Parent(s): 7b0bd75

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +34 -0
README.md CHANGED
@@ -50,6 +50,40 @@ The following hyperparameters were used during training:
50
  - lr_scheduler_type: linear
51
  - num_epochs: 3
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  ### Training results
54
 
55
 
 
50
  - lr_scheduler_type: linear
51
  - num_epochs: 3
52
 
53
+
54
+ ### Usage
55
+ ```
56
+ from transformers import AutoModelForTokenClassification, AutoTokenizer
57
+ import torch
58
+
59
+ model = AutoModelForTokenClassification.from_pretrained('kiendt/phobert-ner-address')
60
+ tokenizer = AutoTokenizer.from_pretrained('kiendt/phobert-ner-address')
61
+
62
+ label_list = ['B_PRO', 'B_CITY', 'NUMBER_TYPE', 'B_DIST', 'TO_TYPE', 'B_STREET', 'I_PRO', 'I_DIST', 'PRO_TYPE', 'OTHER', 'I_STREET', 'B_WARD', 'STREET_TYPE', 'I_CITY', 'CITY_TYPE', 'O', 'NUMBER', 'WARD_TYPE', 'I_WARD', 'DIST_TYPE', 'TO']
63
+
64
+ id2label = {i: label for i, label in enumerate(label_list)}
65
+
66
+ def predict_entities(text):
67
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
68
+ with torch.no_grad():
69
+ outputs = model(**inputs)
70
+ logits = outputs.logits
71
+
72
+ predictions = torch.argmax(logits, dim=-1)
73
+ predicted_labels = [id2label[label.item()] for label in predictions[0]]
74
+
75
+ tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
76
+
77
+ print("\nTokens and Predicted Labels:")
78
+ print(f"{'Token':<15} {'Predicted Label'}")
79
+ print("-" * 40)
80
+ for token, label in zip(tokens, predicted_labels):
81
+ print(f"{token:<15} {label}")
82
+
83
+ predict_entities("Km 1 đường Nguyễn Văn Linh, PHƯỜNG PHÚC ĐỒNG, QUẬN LONG BIÊN, THÀNH PHỐ HÀ NỘI")
84
+ ```
85
+
86
+
87
  ### Training results
88
 
89