Spaces:
Sleeping
Sleeping
Upload streamlit_app.py
Browse files- src/streamlit_app.py +61 -2
src/streamlit_app.py
CHANGED
|
@@ -20,6 +20,18 @@ LABEL_COLORS = {
|
|
| 20 |
'LABEL-8': '#ffc6ff', # I-SET
|
| 21 |
}
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
@st.cache_resource(show_spinner=True)
|
| 24 |
def load_model():
|
| 25 |
tokenizer = AutoTokenizer.from_pretrained('asdc/Bio-RoBERTime')
|
|
@@ -75,12 +87,59 @@ def colorize_entities(ner_result: List[Tuple[str, str]]) -> str:
|
|
| 75 |
html += f'{token} '
|
| 76 |
return html
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
st.title('LLM-powered Named Entity Recognition (NER)')
|
| 79 |
|
|
|
|
|
|
|
|
|
|
| 80 |
user_text = st.text_area('Enter text for NER:', height=150)
|
| 81 |
|
| 82 |
if user_text:
|
| 83 |
ner_result = ner_with_robertime(user_text)
|
| 84 |
-
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
st.caption('Model: [asdc/Bio-RoBERTime](https://huggingface.co/asdc/Bio-RoBERTime)')
|
|
|
|
| 20 |
'LABEL-8': '#ffc6ff', # I-SET
|
| 21 |
}
|
| 22 |
|
| 23 |
+
LABEL_MEANINGS = {
|
| 24 |
+
'LABEL-0': 'NONE',
|
| 25 |
+
'LABEL-1': 'B-DATE',
|
| 26 |
+
'LABEL-2': 'I-DATE',
|
| 27 |
+
'LABEL-3': 'B-TIME',
|
| 28 |
+
'LABEL-4': 'I-TIME',
|
| 29 |
+
'LABEL-5': 'B-DURATION',
|
| 30 |
+
'LABEL-6': 'I-DURATION',
|
| 31 |
+
'LABEL-7': 'B-SET',
|
| 32 |
+
'LABEL-8': 'I-SET',
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
@st.cache_resource(show_spinner=True)
|
| 36 |
def load_model():
|
| 37 |
tokenizer = AutoTokenizer.from_pretrained('asdc/Bio-RoBERTime')
|
|
|
|
| 87 |
html += f'{token} '
|
| 88 |
return html
|
| 89 |
|
| 90 |
+
def extract_entities(ner_result: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
| 91 |
+
# Group consecutive tokens with the same entity label (not LABEL-0)
|
| 92 |
+
entities = []
|
| 93 |
+
current_entity = []
|
| 94 |
+
current_label = None
|
| 95 |
+
for token, label in ner_result:
|
| 96 |
+
if label != 'LABEL-0':
|
| 97 |
+
if current_label == label:
|
| 98 |
+
current_entity.append(token)
|
| 99 |
+
else:
|
| 100 |
+
if current_entity:
|
| 101 |
+
entities.append((' '.join(current_entity), current_label))
|
| 102 |
+
current_entity = [token]
|
| 103 |
+
current_label = label
|
| 104 |
+
else:
|
| 105 |
+
if current_entity:
|
| 106 |
+
entities.append((' '.join(current_entity), current_label))
|
| 107 |
+
current_entity = []
|
| 108 |
+
current_label = None
|
| 109 |
+
if current_entity:
|
| 110 |
+
entities.append((' '.join(current_entity), current_label))
|
| 111 |
+
return entities
|
| 112 |
+
|
| 113 |
+
def legend_html() -> str:
|
| 114 |
+
html = '<div style="display:flex;flex-wrap:wrap;gap:8px;">'
|
| 115 |
+
for label, color in LABEL_COLORS.items():
|
| 116 |
+
if label == 'LABEL-0':
|
| 117 |
+
continue
|
| 118 |
+
meaning = LABEL_MEANINGS[label]
|
| 119 |
+
html += f'<span style="background-color:{color};padding:2px 8px;border-radius:4px;">{meaning} ({label})</span>'
|
| 120 |
+
html += '</div>'
|
| 121 |
+
return html
|
| 122 |
+
|
| 123 |
st.title('LLM-powered Named Entity Recognition (NER)')
|
| 124 |
|
| 125 |
+
st.markdown('**Legend:**')
|
| 126 |
+
st.markdown(legend_html(), unsafe_allow_html=True)
|
| 127 |
+
|
| 128 |
user_text = st.text_area('Enter text for NER:', height=150)
|
| 129 |
|
| 130 |
if user_text:
|
| 131 |
ner_result = ner_with_robertime(user_text)
|
| 132 |
+
has_entity = any(label != 'LABEL-0' for _, label in ner_result)
|
| 133 |
+
if has_entity:
|
| 134 |
+
st.markdown('#### Entities Highlighted:')
|
| 135 |
+
st.markdown(colorize_entities(ner_result), unsafe_allow_html=True)
|
| 136 |
+
entities = extract_entities(ner_result)
|
| 137 |
+
if entities:
|
| 138 |
+
st.markdown('#### Detected Entities:')
|
| 139 |
+
for ent, label in entities:
|
| 140 |
+
st.markdown(f'- <span style="background-color:{LABEL_COLORS[label]};padding:2px 8px;border-radius:4px;">{ent}</span> <span style="color:#888;">({LABEL_MEANINGS[label]})</span>', unsafe_allow_html=True)
|
| 141 |
+
else:
|
| 142 |
+
st.info('No entities detected.')
|
| 143 |
+
else:
|
| 144 |
+
st.info('No entities detected.')
|
| 145 |
st.caption('Model: [asdc/Bio-RoBERTime](https://huggingface.co/asdc/Bio-RoBERTime)')
|