Spaces:
Sleeping
Sleeping
Nathan Schneider
commited on
Commit
·
285f69e
1
Parent(s):
7473b3d
JSON outputs
Browse files
app.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import html
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
import spaces
|
| 4 |
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification, TokenClassificationPipeline
|
|
@@ -239,13 +240,12 @@ def classify_tokens(text: str):
|
|
| 239 |
framework="pt"
|
| 240 |
)
|
| 241 |
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
print(results_none)
|
| 245 |
|
| 246 |
-
#
|
| 247 |
-
|
| 248 |
-
|
| 249 |
|
| 250 |
# color helper that tolerates B-/I- prefixes
|
| 251 |
def pick_color(label: str, lbl2color: dict) -> str:
|
|
@@ -273,7 +273,7 @@ def classify_tokens(text: str):
|
|
| 273 |
# ---------- Output 1: SIMPLE (grouped spans) ----------
|
| 274 |
output1, last_idx = "", 0
|
| 275 |
lbl2color = {}
|
| 276 |
-
for e in
|
| 277 |
s, t = e["start"], e["end"]
|
| 278 |
lab = e["entity_group"] # grouped results use entity_group
|
| 279 |
short_lab = display_label(lab)
|
|
@@ -292,7 +292,7 @@ def classify_tokens(text: str):
|
|
| 292 |
|
| 293 |
|
| 294 |
output2, last_idx2 = "", 0
|
| 295 |
-
for e in
|
| 296 |
s, t = e["start"], e["end"]
|
| 297 |
lab = e["entity"] # NONE returns `entity`
|
| 298 |
probs = e["probabilities"]
|
|
@@ -317,7 +317,7 @@ def classify_tokens(text: str):
|
|
| 317 |
table_html += "<tr><th style='border:1px solid #ccc;padding:6px;'>Token</th>"
|
| 318 |
table_html += "<th style='border:1px solid #ccc;padding:6px;'>SNACS Label</th>"
|
| 319 |
table_html += "<th style='border:1px solid #ccc;padding:6px;'>Confidence</th></tr>"
|
| 320 |
-
for e in
|
| 321 |
token = html.escape(e["word"])
|
| 322 |
lab = e["entity_group"]
|
| 323 |
short_lab = display_label(lab)
|
|
@@ -334,7 +334,7 @@ def classify_tokens(text: str):
|
|
| 334 |
|
| 335 |
styled_html1 = f"<div style='font-family:sans-serif;line-height:1.6;'>{output1}</div>"
|
| 336 |
styled_html2 = f"<div style='font-family:sans-serif;line-height:1.6;'>{output2}</div>"
|
| 337 |
-
return
|
| 338 |
# except Exception as e:
|
| 339 |
# # Force the real error into the Space logs
|
| 340 |
# import traceback, sys
|
|
@@ -343,7 +343,7 @@ def classify_tokens(text: str):
|
|
| 343 |
# return f"<pre>{html.escape(repr(e))}</pre>", "", ""
|
| 344 |
|
| 345 |
|
| 346 |
-
with gr.Blocks(title="SNACS Tagging"
|
| 347 |
with gr.Row():
|
| 348 |
description = gr.HTML(DESCR_TOP)
|
| 349 |
|
|
@@ -368,9 +368,13 @@ with gr.Blocks(title="SNACS Tagging", theme="light") as demo:
|
|
| 368 |
output1 = gr.HTML(label="SNACS Tagged Sentence")
|
| 369 |
output2 = gr.HTML(label="SNACS Table with Colored Labels")
|
| 370 |
output3 = gr.HTML(label="SNACS Tagged Sentence with No Label Aggregation")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 371 |
|
| 372 |
-
examples.outputs = [simple_output,output1,output2,output3]
|
| 373 |
-
tag_btn.click(fn=classify_tokens, inputs=input_text, outputs=
|
| 374 |
|
| 375 |
|
| 376 |
demo.launch()
|
|
|
|
| 1 |
import html
|
| 2 |
+
import json
|
| 3 |
import gradio as gr
|
| 4 |
import spaces
|
| 5 |
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification, TokenClassificationPipeline
|
|
|
|
| 240 |
framework="pt"
|
| 241 |
)
|
| 242 |
|
| 243 |
+
# tagged spans
|
| 244 |
+
results_spans = pipe(text, aggregation_strategy="simple").sort(key=lambda x: x["start"])
|
|
|
|
| 245 |
|
| 246 |
+
# per-token + probabilities
|
| 247 |
+
results_tokens = pipe(text, aggregation_strategy="none", ignore_labels=[]).sort(key=lambda x: x["start"])
|
| 248 |
+
print(results_tokens)
|
| 249 |
|
| 250 |
# color helper that tolerates B-/I- prefixes
|
| 251 |
def pick_color(label: str, lbl2color: dict) -> str:
|
|
|
|
| 273 |
# ---------- Output 1: SIMPLE (grouped spans) ----------
|
| 274 |
output1, last_idx = "", 0
|
| 275 |
lbl2color = {}
|
| 276 |
+
for e in results_spans:
|
| 277 |
s, t = e["start"], e["end"]
|
| 278 |
lab = e["entity_group"] # grouped results use entity_group
|
| 279 |
short_lab = display_label(lab)
|
|
|
|
| 292 |
|
| 293 |
|
| 294 |
output2, last_idx2 = "", 0
|
| 295 |
+
for e in results_tokens:
|
| 296 |
s, t = e["start"], e["end"]
|
| 297 |
lab = e["entity"] # NONE returns `entity`
|
| 298 |
probs = e["probabilities"]
|
|
|
|
| 317 |
table_html += "<tr><th style='border:1px solid #ccc;padding:6px;'>Token</th>"
|
| 318 |
table_html += "<th style='border:1px solid #ccc;padding:6px;'>SNACS Label</th>"
|
| 319 |
table_html += "<th style='border:1px solid #ccc;padding:6px;'>Confidence</th></tr>"
|
| 320 |
+
for e in results_spans:
|
| 321 |
token = html.escape(e["word"])
|
| 322 |
lab = e["entity_group"]
|
| 323 |
short_lab = display_label(lab)
|
|
|
|
| 334 |
|
| 335 |
styled_html1 = f"<div style='font-family:sans-serif;line-height:1.6;'>{output1}</div>"
|
| 336 |
styled_html2 = f"<div style='font-family:sans-serif;line-height:1.6;'>{output2}</div>"
|
| 337 |
+
return results_spans, json.dumps(results_spans), json.dumps(results_tokens), styled_html1, table_html, styled_html2
|
| 338 |
# except Exception as e:
|
| 339 |
# # Force the real error into the Space logs
|
| 340 |
# import traceback, sys
|
|
|
|
| 343 |
# return f"<pre>{html.escape(repr(e))}</pre>", "", ""
|
| 344 |
|
| 345 |
|
| 346 |
+
with gr.Blocks(title="SNACS Tagging") as demo:
|
| 347 |
with gr.Row():
|
| 348 |
description = gr.HTML(DESCR_TOP)
|
| 349 |
|
|
|
|
| 368 |
output1 = gr.HTML(label="SNACS Tagged Sentence")
|
| 369 |
output2 = gr.HTML(label="SNACS Table with Colored Labels")
|
| 370 |
output3 = gr.HTML(label="SNACS Tagged Sentence with No Label Aggregation")
|
| 371 |
+
with gr.Tab("JSON Spans"):
|
| 372 |
+
json_spans = gr.Code(language="json")
|
| 373 |
+
with gr.Tab("JSON Tokens"):
|
| 374 |
+
json_tokens = gr.Code(language="json")
|
| 375 |
|
| 376 |
+
examples.outputs = [simple_output,json_spans,json_tokens,output1,output2,output3]
|
| 377 |
+
tag_btn.click(fn=classify_tokens, inputs=input_text, outputs=examples.outputs)
|
| 378 |
|
| 379 |
|
| 380 |
demo.launch()
|