ekatosha commited on
Commit
dad38f6
·
1 Parent(s): f9a91f6

feature: add ner app

Browse files
Files changed (5) hide show
  1. config.py +9 -0
  2. const.py +6 -0
  3. main.py +20 -0
  4. requirements.txt +4 -0
  5. utils.py +23 -0
config.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings
2
+
3
+ class Settings(BaseSettings):
4
+ TASK: str = "ner"
5
+ MODEL_NAME: str = "dslim/bert-base-NER"
6
+ TITLE: str = 'Named Entity Recog with'
7
+
8
+
9
+ settings = Settings()
const.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ COLORS = {
2
+ "LOC": "#F67DE3", # Light pink
3
+ "ORG": "#7DF6D9", # Light teal
4
+ "PER": "#F6E37D", # Light yellow
5
+ "MISC": "#7D9BF6" # Light blue
6
+ }
main.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from spacy import displacy
3
+ from config import settings
4
+ from const import COLORS
5
+ from utils import init_model, custom_predict
6
+
7
+
8
+ def main():
9
+ st.title("Entity Checker")
10
+ raw_text = st.text_area("Enter Text Here", "Type Here")
11
+ if st.button("Analyze"):
12
+ pipe = init_model(settings.TASK, settings.MODEL_NAME)
13
+ result = custom_predict(raw_text, pipe)
14
+ st.subheader(f"{settings.TITLE} {settings.MODEL_NAME}")
15
+ options = {"ents": ["LOC", "ORG", "PER", "MISC"], "colors": COLORS}
16
+ ent_html = displacy.render(result, style="ent", manual=True, options=options)
17
+ st.markdown(ent_html, unsafe_allow_html=True)
18
+
19
+ if __name__ == '__main__':
20
+ main()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit>=1.40.2
2
+ spacy>=spacy-3.8.2
3
+ pydantic_settings>=2.6.1
4
+ transformers>=4.46.3
utils.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import lru_cache
2
+
3
+ from transformers import pipeline, Pipeline
4
+
5
+
6
+ @lru_cache
7
+ def init_model( task: str, model: str = None, aggregation_strategy: str = None) -> Pipeline:
8
+ ner_pipeline = pipeline(
9
+ task, model=model, aggregation_strategy=aggregation_strategy
10
+ )
11
+ return ner_pipeline
12
+
13
+
14
+ def custom_predict(text: str, pipe: str):
15
+ result = pipe(text, aggregation_strategy="simple")
16
+ ents = [
17
+ {"start": dic['start'],
18
+ "end": dic['end'],
19
+ "label": dic['entity_group']}
20
+ for dic in result]
21
+ return {"text": text,
22
+ "ents": ents,
23
+ "title": None}