Nathan Schneider commited on
Commit
285f69e
·
1 Parent(s): 7473b3d

JSON outputs

Browse files
Files changed (1) hide show
  1. app.py +17 -13
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import html
 
2
  import gradio as gr
3
  import spaces
4
  from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification, TokenClassificationPipeline
@@ -239,13 +240,12 @@ def classify_tokens(text: str):
239
  framework="pt"
240
  )
241
 
242
- results_simple = pipe(text, aggregation_strategy="simple") # output #1
243
- results_none = pipe(text, aggregation_strategy="none", ignore_labels=[]) # output #2 (per-token + probabilities)
244
- print(results_none)
245
 
246
- # sort
247
- sorted_results1 = sorted(results_simple, key=lambda x: x["start"])
248
- sorted_results2 = sorted(results_none, key=lambda x: x["start"])
249
 
250
  # color helper that tolerates B-/I- prefixes
251
  def pick_color(label: str, lbl2color: dict) -> str:
@@ -273,7 +273,7 @@ def classify_tokens(text: str):
273
  # ---------- Output 1: SIMPLE (grouped spans) ----------
274
  output1, last_idx = "", 0
275
  lbl2color = {}
276
- for e in sorted_results1:
277
  s, t = e["start"], e["end"]
278
  lab = e["entity_group"] # grouped results use entity_group
279
  short_lab = display_label(lab)
@@ -292,7 +292,7 @@ def classify_tokens(text: str):
292
 
293
 
294
  output2, last_idx2 = "", 0
295
- for e in sorted_results2:
296
  s, t = e["start"], e["end"]
297
  lab = e["entity"] # NONE returns `entity`
298
  probs = e["probabilities"]
@@ -317,7 +317,7 @@ def classify_tokens(text: str):
317
  table_html += "<tr><th style='border:1px solid #ccc;padding:6px;'>Token</th>"
318
  table_html += "<th style='border:1px solid #ccc;padding:6px;'>SNACS Label</th>"
319
  table_html += "<th style='border:1px solid #ccc;padding:6px;'>Confidence</th></tr>"
320
- for e in sorted_results1:
321
  token = html.escape(e["word"])
322
  lab = e["entity_group"]
323
  short_lab = display_label(lab)
@@ -334,7 +334,7 @@ def classify_tokens(text: str):
334
 
335
  styled_html1 = f"<div style='font-family:sans-serif;line-height:1.6;'>{output1}</div>"
336
  styled_html2 = f"<div style='font-family:sans-serif;line-height:1.6;'>{output2}</div>"
337
- return sorted_results1, styled_html1, table_html, styled_html2
338
  # except Exception as e:
339
  # # Force the real error into the Space logs
340
  # import traceback, sys
@@ -343,7 +343,7 @@ def classify_tokens(text: str):
343
  # return f"<pre>{html.escape(repr(e))}</pre>", "", ""
344
 
345
 
346
- with gr.Blocks(title="SNACS Tagging", theme="light") as demo:
347
  with gr.Row():
348
  description = gr.HTML(DESCR_TOP)
349
 
@@ -368,9 +368,13 @@ with gr.Blocks(title="SNACS Tagging", theme="light") as demo:
368
  output1 = gr.HTML(label="SNACS Tagged Sentence")
369
  output2 = gr.HTML(label="SNACS Table with Colored Labels")
370
  output3 = gr.HTML(label="SNACS Tagged Sentence with No Label Aggregation")
 
 
 
 
371
 
372
- examples.outputs = [simple_output,output1,output2,output3]
373
- tag_btn.click(fn=classify_tokens, inputs=input_text, outputs=[simple_output,output1,output2,output3])
374
 
375
 
376
  demo.launch()
 
1
  import html
2
+ import json
3
  import gradio as gr
4
  import spaces
5
  from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification, TokenClassificationPipeline
 
240
  framework="pt"
241
  )
242
 
243
+ # tagged spans
244
+ results_spans = pipe(text, aggregation_strategy="simple").sort(key=lambda x: x["start"])
 
245
 
246
+ # per-token + probabilities
247
+ results_tokens = pipe(text, aggregation_strategy="none", ignore_labels=[]).sort(key=lambda x: x["start"])
248
+ print(results_tokens)
249
 
250
  # color helper that tolerates B-/I- prefixes
251
  def pick_color(label: str, lbl2color: dict) -> str:
 
273
  # ---------- Output 1: SIMPLE (grouped spans) ----------
274
  output1, last_idx = "", 0
275
  lbl2color = {}
276
+ for e in results_spans:
277
  s, t = e["start"], e["end"]
278
  lab = e["entity_group"] # grouped results use entity_group
279
  short_lab = display_label(lab)
 
292
 
293
 
294
  output2, last_idx2 = "", 0
295
+ for e in results_tokens:
296
  s, t = e["start"], e["end"]
297
  lab = e["entity"] # NONE returns `entity`
298
  probs = e["probabilities"]
 
317
  table_html += "<tr><th style='border:1px solid #ccc;padding:6px;'>Token</th>"
318
  table_html += "<th style='border:1px solid #ccc;padding:6px;'>SNACS Label</th>"
319
  table_html += "<th style='border:1px solid #ccc;padding:6px;'>Confidence</th></tr>"
320
+ for e in results_spans:
321
  token = html.escape(e["word"])
322
  lab = e["entity_group"]
323
  short_lab = display_label(lab)
 
334
 
335
  styled_html1 = f"<div style='font-family:sans-serif;line-height:1.6;'>{output1}</div>"
336
  styled_html2 = f"<div style='font-family:sans-serif;line-height:1.6;'>{output2}</div>"
337
+ return results_spans, json.dumps(results_spans), json.dumps(results_tokens), styled_html1, table_html, styled_html2
338
  # except Exception as e:
339
  # # Force the real error into the Space logs
340
  # import traceback, sys
 
343
  # return f"<pre>{html.escape(repr(e))}</pre>", "", ""
344
 
345
 
346
+ with gr.Blocks(title="SNACS Tagging") as demo:
347
  with gr.Row():
348
  description = gr.HTML(DESCR_TOP)
349
 
 
368
  output1 = gr.HTML(label="SNACS Tagged Sentence")
369
  output2 = gr.HTML(label="SNACS Table with Colored Labels")
370
  output3 = gr.HTML(label="SNACS Tagged Sentence with No Label Aggregation")
371
+ with gr.Tab("JSON Spans"):
372
+ json_spans = gr.Code(language="json")
373
+ with gr.Tab("JSON Tokens"):
374
+ json_tokens = gr.Code(language="json")
375
 
376
+ examples.outputs = [simple_output,json_spans,json_tokens,output1,output2,output3]
377
+ tag_btn.click(fn=classify_tokens, inputs=input_text, outputs=examples.outputs)
378
 
379
 
380
  demo.launch()