Spaces:

c-ho
/

test_ner_2

Sleeping

App Files Files Community

c-ho commited on 28 days ago

Commit

5c82ebc

verified ·

1 Parent(s): f520020

Update app.py

Browse files

Files changed (1) hide show

app.py +120 -16

app.py CHANGED Viewed

@@ -119,20 +119,34 @@ def analyze_text(text, model_name):
     results = ner(text)
     results = merge_subwords(results)
-    entities = []
     table_rows = []
     for ent in results:
         label = ent["entity_group"]
-        entities.append({
-            "start": ent["start"],
-            "end": ent["end"],
-            "label": label,
-        })
         table_rows.append([
             ent["word"],
@@ -140,23 +154,113 @@ def analyze_text(text, model_name):
             round(ent["score"], 3)
         ])
-    highlighted_output = {
-        "text": text,
-        "entities": entities
-    }
-    return highlighted_output, table_rows
 # ---------------------------------------------------
 # Entity colors
 # ---------------------------------------------------
 COLOR_MAP = {
-    "LanguageRelatedTerm": "#ffcc00",
-    "OtherLinguisticTerm": "#99ccff",
-    "PhonologicalPhenomenon": "#ff9999",
-    "MorphosyntacticPhenomenon": "#99ff99",
-    "TOPNODE_DUMMY": "#dddddd",
 }
 # ---------------------------------------------------

     results = ner(text)
+    # merge subwords first
     results = merge_subwords(results)
+    highlighted_text = []
+    last_idx = 0
     table_rows = []
     for ent in results:
+        start = ent["start"]
+        end = ent["end"]
         label = ent["entity_group"]
+        # Add normal text before entity
+        if start > last_idx:
+            highlighted_text.append(
+                (text[last_idx:start], None)
+            )
+        # Add highlighted entity
+        highlighted_text.append(
+            (text[start:end], label)
+        )
+        last_idx = end
         table_rows.append([
             ent["word"],
             round(ent["score"], 3)
         ])
+    # Add remaining text
+    if last_idx < len(text):
+        highlighted_text.append(
+            (text[last_idx:], None)
+        )
+    return highlighted_text, table_rows
 # ---------------------------------------------------
 # Entity colors
 # ---------------------------------------------------
 COLOR_MAP = {
+    # -----------------------------------
+    # Academic / theoretical
+    # -----------------------------------
+    "AcademicDiscipline": "#264653",              # deep teal
+    "AmbiguouslyDefinedConcept": "#6D597A",       # muted purple
+    "UnclassifiedLinguisticConcept": "#9A8C98",   # soft gray-purple
+    # -----------------------------------
+    # Language / general linguistic
+    # -----------------------------------
+    "LanguageRelatedTerm": "#E9C46A",             # warm sand yellow
+    "OtherLinguisticTerm": "#A8DADC",             # pale cyan
+    "LanguageResourceInformation": "#457B9D",     # medium blue
+    # -----------------------------------
+    # Phonology / graphemics
+    # -----------------------------------
+    "PhonologicalPhenomenon": "#E76F51",          # coral red
+    "GraphemicPhenomenon": "#F4A261",             # orange
+    # -----------------------------------
+    # Morphology / syntax
+    # -----------------------------------
+    "MorphologicalPhenomenon": "#2A9D8F",         # turquoise green
+    "MorphosyntacticPhenomenon": "#52B788",       # medium green
+    "SyntacticPhenomenon": "#40916C",             # darker green
+    # -----------------------------------
+    # Lexicon / semantics / discourse
+    # -----------------------------------
+    "LexicalPhenomenon": "#577590",               # slate blue
+    "SemanticPhenomenon": "#4361EE",              # vivid blue
+    "DiscoursePhenomenon": "#B5179E",             # magenta-purple
+    # -----------------------------------
+    # Special / misc
+    # -----------------------------------
+    "NEW_TAG": "#FF006E",                         # neon pink
+    "TOPNODE_DUMMY": "#BDBDBD",                   # neutral gray
+    # -----------------------------------
+    # Optional BIO aliases
+    # (safe fallback if model outputs raw BIO labels)
+    # -----------------------------------
+    "B-AcademicDiscipline": "#264653",
+    "I-AcademicDiscipline": "#264653",
+    "B-AmbiguouslyDefinedConcept": "#6D597A",
+    "I-AmbiguouslyDefinedConcept": "#6D597A",
+    "B-DiscoursePhenomenon": "#B5179E",
+    "I-DiscoursePhenomenon": "#B5179E",
+    "B-GraphemicPhenomenon": "#F4A261",
+    "I-GraphemicPhenomenon": "#F4A261",
+    "B-LanguageRelatedTerm": "#E9C46A",
+    "I-LanguageRelatedTerm": "#E9C46A",
+    "B-LanguageResourceInformation": "#457B9D",
+    "I-LanguageResourceInformation": "#457B9D",
+    "B-LexicalPhenomenon": "#577590",
+    "I-LexicalPhenomenon": "#577590",
+    "B-MorphologicalPhenomenon": "#2A9D8F",
+    "I-MorphologicalPhenomenon": "#2A9D8F",
+    "B-MorphosyntacticPhenomenon": "#52B788",
+    "I-MorphosyntacticPhenomenon": "#52B788",
+    "B-OtherLinguisticTerm": "#A8DADC",
+    "I-OtherLinguisticTerm": "#A8DADC",
+    "B-PhonologicalPhenomenon": "#E76F51",
+    "I-PhonologicalPhenomenon": "#E76F51",
+    "B-SemanticPhenomenon": "#4361EE",
+    "I-SemanticPhenomenon": "#4361EE",
+    "B-SyntacticPhenomenon": "#40916C",
+    "I-SyntacticPhenomenon": "#40916C",
+    "B-TOPNODE_DUMMY": "#BDBDBD",
+    "I-TOPNODE_DUMMY": "#BDBDBD",
+    "B-UnclassifiedLinguisticConcept": "#9A8C98",
+    "I-UnclassifiedLinguisticConcept": "#9A8C98",
+    "B-NEW_TAG": "#FF006E",
+    "I-NEW_TAG": "#FF006E",
+    # Outside tag
+    "O": "#FFFFFF"
 }
 # ---------------------------------------------------