Behpouyan
/

Behpouyan-NER

Token Classification

Named Entity Recognition

Model card Files Files and versions

Behpouyan commited on Dec 5, 2024

Commit

3d01653

·

verified ·

1 Parent(s): a1c6451

Update README.md

Files changed (1) hide show

README.md +47 -4

README.md CHANGED Viewed

@@ -51,15 +51,58 @@ Here’s how you can use the model:
 ```python
 from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
-model_name = "Behpouyan/Behpouyan-NER"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForTokenClassification.from_pretrained(model_name)
 nlp = pipeline("ner", model=model, tokenizer=tokenizer)
 example = '''
 "در سال ۱۴۰۱، شرکت علی‌بابا اعلام کرد که با همکاری بانک ملت، یک پروژه بزرگ برای توسعه زیرساخت‌های تجارت الکترونیک در ایران آغاز خواهد کرد.
 این پروژه در تهران و اصفهان اجرا می‌شود و پیش‌بینی می‌شود تا پایان سال ۱۴۰۲ تکمیل شود."
 '''
 ner_results = nlp(example)
-print(ner_results)

 ```python
 from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
+tokenizer = AutoTokenizer.from_pretrained("Behpouyan/Behpouyan-NER")
+model = AutoModelForTokenClassification.from_pretrained("Behpouyan/Behpouyan-NER")
 nlp = pipeline("ner", model=model, tokenizer=tokenizer)
+# Input example
 example = '''
 "در سال ۱۴۰۱، شرکت علی‌بابا اعلام کرد که با همکاری بانک ملت، یک پروژه بزرگ برای توسعه زیرساخت‌های تجارت الکترونیک در ایران آغاز خواهد کرد.
 این پروژه در تهران و اصفهان اجرا می‌شود و پیش‌بینی می‌شود تا پایان سال ۱۴۰۲ تکمیل شود."
 '''
+# Get NER results
 ner_results = nlp(example)
+# Function to merge subword entities
+def merge_entities(entities):
+    merged_results = []
+    current_entity = None
+    for entity in entities:
+        if entity['entity'].startswith("B-") or current_entity is None:
+            # Start a new entity
+            if current_entity:
+                merged_results.append(current_entity)
+            current_entity = {
+                "word": entity['word'].strip(),
+                "entity": entity['entity'][2:],  # Remove "B-" prefix
+                "score": entity['score'],
+                "start": entity['start'],
+                "end": entity['end'],
+            }
+        elif entity['entity'].startswith("I-") and current_entity:
+            # Continue the current entity
+            current_entity['word'] += entity['word'].strip()
+            current_entity['score'] = min(current_entity['score'], entity['score'])  # Use the lowest score
+            current_entity['end'] = entity['end']
+    # Add the last entity if any
+    if current_entity:
+        merged_results.append(current_entity)
+    return merged_results
+# Merge the entities
+merged_results = merge_entities(ner_results)
+# Display the merged results
+print("Named Entity Recognition Results:")
+for entity in merged_results:
+    print(f"- Entity: {entity['word']}")
+    print(f"  Type: {entity['entity']}")
+    print(f"  Score: {entity['score']:.2f}")
+    print(f"  Start: {entity['start']}, End: {entity['end']}")
+    print("-" * 40)