from transformers import pipeline

# ────────────────────────────────────────────────────────────────
# 2. Token Classification / NER
# ────────────────────────────────────────────────────────────────
# Default model: dbmdz/bert-large-cased-finetuned-conll03-english
#   → Used for: standard English NER (PER, ORG, LOC, MISC)
#              trained on CoNLL-2003 → reliable on news/articles
#              but weaker on social media / Indian names/cities
#
# Alternative model: dslim/bert-base-NER
#   → Used for: lighter & faster version (110M vs 340M params)
#              still strong on standard entities, popular choice

#pipe = pipeline("ner", aggregation_strategy="simple")
pipe = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")

print(pipe("kapil is from pathankot"))