Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,7 +13,6 @@
|
|
| 13 |
# print(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
|
| 14 |
# st.json(entity)
|
| 15 |
|
| 16 |
-
|
| 17 |
import streamlit as st
|
| 18 |
from transformers import pipeline
|
| 19 |
|
|
@@ -23,51 +22,53 @@ ner_pipeline = pipeline("ner", model="Beehzod/smart-finetuned-ner")
|
|
| 23 |
# Helper function to combine subword tokens
|
| 24 |
def merge_entities(entities):
|
| 25 |
merged_entities = []
|
| 26 |
-
current_entity =
|
| 27 |
-
|
| 28 |
for token in entities:
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
| 35 |
merged_entities.append(current_entity)
|
| 36 |
-
|
| 37 |
# Start a new entity
|
| 38 |
current_entity = {
|
| 39 |
-
"word":
|
| 40 |
"entity": token['entity'],
|
| 41 |
"score": token['score'],
|
| 42 |
"start": token['start'],
|
| 43 |
"end": token['end'],
|
| 44 |
-
"count": 1 # for
|
| 45 |
}
|
| 46 |
else:
|
| 47 |
-
# Continue
|
| 48 |
-
current_entity["word"] +=
|
| 49 |
current_entity["end"] = token['end']
|
| 50 |
current_entity["score"] += token['score']
|
| 51 |
current_entity["count"] += 1
|
| 52 |
|
| 53 |
-
# Add the last entity
|
| 54 |
-
if current_entity
|
| 55 |
current_entity['score'] /= current_entity['count']
|
| 56 |
del current_entity['count']
|
| 57 |
merged_entities.append(current_entity)
|
| 58 |
|
| 59 |
return merged_entities
|
| 60 |
|
| 61 |
-
#
|
| 62 |
-
text = st.text_area('Enter text:
|
| 63 |
|
| 64 |
-
# Run NER model
|
| 65 |
if text:
|
| 66 |
results = ner_pipeline(text)
|
| 67 |
-
# Merge entities for clean output
|
| 68 |
merged_results = merge_entities(results)
|
| 69 |
-
|
| 70 |
-
# Display
|
| 71 |
for entity in merged_results:
|
| 72 |
-
st.write(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
|
| 73 |
st.json(entity)
|
|
|
|
| 13 |
# print(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
|
| 14 |
# st.json(entity)
|
| 15 |
|
|
|
|
| 16 |
import streamlit as st
|
| 17 |
from transformers import pipeline
|
| 18 |
|
|
|
|
| 22 |
# Helper function to combine subword tokens
|
| 23 |
def merge_entities(entities):
|
| 24 |
merged_entities = []
|
| 25 |
+
current_entity = None
|
| 26 |
+
|
| 27 |
for token in entities:
|
| 28 |
+
token_text = token['word'].replace("##", "") # Remove subword prefix if any
|
| 29 |
+
|
| 30 |
+
# If token starts a new entity
|
| 31 |
+
if token['entity'].startswith('B-') or (current_entity and token['entity'] != current_entity['entity']):
|
| 32 |
+
# Add the previous entity if it exists
|
| 33 |
+
if current_entity:
|
| 34 |
+
# Average the score for all subwords in the entity
|
| 35 |
+
current_entity['score'] /= current_entity['count']
|
| 36 |
+
del current_entity['count']
|
| 37 |
merged_entities.append(current_entity)
|
| 38 |
+
|
| 39 |
# Start a new entity
|
| 40 |
current_entity = {
|
| 41 |
+
"word": token_text,
|
| 42 |
"entity": token['entity'],
|
| 43 |
"score": token['score'],
|
| 44 |
"start": token['start'],
|
| 45 |
"end": token['end'],
|
| 46 |
+
"count": 1 # Helper count for score averaging
|
| 47 |
}
|
| 48 |
else:
|
| 49 |
+
# Continue with the current entity
|
| 50 |
+
current_entity["word"] += token_text
|
| 51 |
current_entity["end"] = token['end']
|
| 52 |
current_entity["score"] += token['score']
|
| 53 |
current_entity["count"] += 1
|
| 54 |
|
| 55 |
+
# Add the last entity if it exists
|
| 56 |
+
if current_entity:
|
| 57 |
current_entity['score'] /= current_entity['count']
|
| 58 |
del current_entity['count']
|
| 59 |
merged_entities.append(current_entity)
|
| 60 |
|
| 61 |
return merged_entities
|
| 62 |
|
| 63 |
+
# Streamlit app to display entities
|
| 64 |
+
text = st.text_area('Enter text:')
|
| 65 |
|
| 66 |
+
# Run NER model and merge results
|
| 67 |
if text:
|
| 68 |
results = ner_pipeline(text)
|
|
|
|
| 69 |
merged_results = merge_entities(results)
|
| 70 |
+
|
| 71 |
+
# Display results
|
| 72 |
for entity in merged_results:
|
| 73 |
+
#st.write(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
|
| 74 |
st.json(entity)
|