ychenNLP
/

arabic-ner-ace

@@ -3,18 +3,18 @@ license: mit
 ---
 # Arabic NER
-'''
-from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
-ner_model = AutoModelForTokenClassification.from_pretrained("ychen3411/arabic-ner-ace-gigabert")
-ner_tokenizer = AutoTokenizer.from_pretrained("ychen3411/arabic-ner-ace-gigabert")
-ner_pip = pipeline("ner", model=ner_model, tokenizer=ner_tokenizer, grouped_entities=True)
-output = ner_pip('Protests break out across the US after Supreme Court overturns.')
-print(output)
 [{'entity_group': 'GPE', 'score': 0.9979881, 'word': 'us', 'start': 30, 'end': 32}, {'entity_group': 'ORG', 'score': 0.99898684, 'word': 'supreme court', 'start': 39, 'end': 52}]
-output = ner_pip('قال وزير العدل التركي بكير بوزداغ إن أنقرة تريد 12 مشتبهاً بهم من فنلندا و 21 من السويد')
-print(output)
 [{'entity_group': 'PER', 'score': 0.9996214, 'word': 'وزير', 'start': 4, 'end': 8}, {'entity_group': 'ORG', 'score': 0.9952383, 'word': 'العدل', 'start': 9, 'end': 14}, {'entity_group': 'GPE', 'score': 0.9996675, 'word': 'التركي', 'start': 15, 'end': 21}, {'entity_group': 'PER', 'score': 0.9978992, 'word': 'بكير بوزداغ', 'start': 22, 'end': 33}, {'entity_group': 'GPE', 'score': 0.9997154, 'word': 'انقرة', 'start': 37, 'end': 42}, {'entity_group': 'PER', 'score': 0.9946885, 'word': 'مشتبها بهم', 'start': 51, 'end': 62}, {'entity_group': 'GPE', 'score': 0.99967396, 'word': 'فنلندا', 'start': 66, 'end': 72}, {'entity_group': 'PER', 'score': 0.99694425, 'word': '21', 'start': 75, 'end': 77}, {'entity_group': 'GPE', 'score': 0.99963355, 'word': 'السويد', 'start': 81, 'end': 87}]
-'''

 ---
 # Arabic NER
+```python
+>>> from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
+>>> ner_model = AutoModelForTokenClassification.from_pretrained("ychen3411/arabic-ner-ace-gigabert")
+>>> ner_tokenizer = AutoTokenizer.from_pretrained("ychen3411/arabic-ner-ace-gigabert")
+>>> ner_pip = pipeline("ner", model=ner_model, tokenizer=ner_tokenizer, grouped_entities=True)
+>>> output = ner_pip('Protests break out across the US after Supreme Court overturns.')
+>>> print(output)
 [{'entity_group': 'GPE', 'score': 0.9979881, 'word': 'us', 'start': 30, 'end': 32}, {'entity_group': 'ORG', 'score': 0.99898684, 'word': 'supreme court', 'start': 39, 'end': 52}]
+>>> output = ner_pip('قال وزير العدل التركي بكير بوزداغ إن أنقرة تريد 12 مشتبهاً بهم من فنلندا و 21 من السويد')
+>>> print(output)
 [{'entity_group': 'PER', 'score': 0.9996214, 'word': 'وزير', 'start': 4, 'end': 8}, {'entity_group': 'ORG', 'score': 0.9952383, 'word': 'العدل', 'start': 9, 'end': 14}, {'entity_group': 'GPE', 'score': 0.9996675, 'word': 'التركي', 'start': 15, 'end': 21}, {'entity_group': 'PER', 'score': 0.9978992, 'word': 'بكير بوزداغ', 'start': 22, 'end': 33}, {'entity_group': 'GPE', 'score': 0.9997154, 'word': 'انقرة', 'start': 37, 'end': 42}, {'entity_group': 'PER', 'score': 0.9946885, 'word': 'مشتبها بهم', 'start': 51, 'end': 62}, {'entity_group': 'GPE', 'score': 0.99967396, 'word': 'فنلندا', 'start': 66, 'end': 72}, {'entity_group': 'PER', 'score': 0.99694425, 'word': '21', 'start': 75, 'end': 77}, {'entity_group': 'GPE', 'score': 0.99963355, 'word': 'السويد', 'start': 81, 'end': 87}]
+```