Spaces:

hs-knowledge
/

ner_app

Sleeping

App Files Files Community

finiteautomata commited on Jun 14, 2023

Commit

a188b38

1 Parent(s): 3b3110b

Reuse NER stuff

Browse files

Files changed (1) hide show

app.py +62 -2

app.py CHANGED Viewed

@@ -7,7 +7,66 @@ from annotated_text import annotated_text
 # Load data
 ds = load_dataset("hs-knowledge/hateval_enriched")
 # Show highlighted ner entities in a tweet
 def display_text(example):
@@ -57,8 +116,9 @@ elements = random.choices(range(len(ds["train"])), k=50)
 ds["train"] = ds["train"].select(elements)
 for ex in ds["train"]:
-    st.write("=" * 80)
-    display_text(ex)
     with st.expander("Show entities"):
         for ent in ex["entities"]:
             entity_name = ent["text"]

 # Load data
 ds = load_dataset("hs-knowledge/hateval_enriched")
 # Show highlighted ner entities in a tweet
+def display_ner(example):
+    ner_output = example["ner_output"]
+    chunks = []
+    current_chunk = ""
+    current_type = None
+    # Check if there are two labels repeated
+    previous_label = None
+    for label in ner_output["labels"]:
+        if (
+            label
+            and previous_label
+            and previous_label == label
+            and label != "O"
+            and not label.startswith("I-")
+            and not label.startswith("B-")
+        ):
+            pass
+        previous_label = label
+    for token, label in zip(ner_output["tokens"], ner_output["labels"]):
+        if label is None:
+            # Perhaps it is too long
+            continue
+        if label == "O":
+            if current_type is not None:
+                # Add previous entity
+                chunks.append((current_chunk.strip(), current_type))
+                current_chunk = token + " "
+                current_type = None
+            else:
+                current_chunk += token + " "
+                current_type = None
+        elif label.startswith("B-"):
+            if current_chunk:
+                chunks.append((current_chunk.strip(), current_type))
+            current_chunk = token + " "
+            current_type = label[2:]
+        elif label.startswith("I-"):
+            current_chunk += token + " "
+            current_type = label[2:]
+        else:
+            # It doesn't start with B- or I- => add single token
+            if label != current_type:
+                chunks.append((current_chunk.strip(), current_type))
+                current_chunk = token + " "
+                current_type = label
+            else:
+                current_chunk += token + " "
+                current_type = label
+    if current_chunk:
+        chunks.append((current_chunk.strip(), current_type))
+    # Display text
+    chunks = [(c, t) if t is not None else c for c, t in chunks]
+    annotated_text(*chunks)
 def display_text(example):
 ds["train"] = ds["train"].select(elements)
 for ex in ds["train"]:
+    # display_text(ex)
+    st.markdown("---")
+    display_ner(ex)
     with st.expander("Show entities"):
         for ent in ex["entities"]:
             entity_name = ent["text"]