Spaces:
Runtime error
Runtime error
carrie commited on
Commit ·
c8b4824
1
Parent(s): 36895c4
update model processing code
Browse files- app.py +52 -4
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -1,14 +1,62 @@
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
| 3 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
| 4 |
-
|
|
|
|
|
|
|
| 5 |
|
| 6 |
model = AutoModelForSeq2SeqLM.from_pretrained("fangyuan/lfqa_role_classification")
|
| 7 |
tokenizer = AutoTokenizer.from_pretrained("fangyuan/lfqa_role_classification")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
-
def predict(input):
|
| 11 |
-
pass
|
| 12 |
|
| 13 |
|
| 14 |
gr.Interface(
|
|
@@ -18,7 +66,7 @@ gr.Interface(
|
|
| 18 |
gr.inputs.Textbox(lines=1, label="Answer:"),
|
| 19 |
],
|
| 20 |
outputs=[
|
| 21 |
-
gr.outputs.Textbox(label="Predicted functional
|
| 22 |
],
|
| 23 |
theme="peach",
|
| 24 |
title="Discourse structure of long-form answer",
|
|
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
| 3 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
| 4 |
+
import stanza
|
| 5 |
+
import re
|
| 6 |
+
stanza.download('en', processors='tokenize')
|
| 7 |
|
| 8 |
model = AutoModelForSeq2SeqLM.from_pretrained("fangyuan/lfqa_role_classification")
|
| 9 |
tokenizer = AutoTokenizer.from_pretrained("fangyuan/lfqa_role_classification")
|
| 10 |
+
en_nlp = stanza.Pipeline('en', processors='tokenize')
|
| 11 |
+
|
| 12 |
+
def get_ans_sentence_with_stanza(answer_paragraph, pipeline,
|
| 13 |
+
is_offset=False):
|
| 14 |
+
'''sentence segmentation with stanza'''
|
| 15 |
+
answer_paragraph_processed = pipeline(answer_paragraph)
|
| 16 |
+
sentences = []
|
| 17 |
+
for sent in answer_paragraph_processed.sentences:
|
| 18 |
+
if is_offset:
|
| 19 |
+
sentences.append((sent.tokens[0].start_char, sent.tokens[-1].end_char))
|
| 20 |
+
else:
|
| 21 |
+
sentence = answer_paragraph[sent.tokens[0].start_char:sent.tokens[-1].end_char + 1]
|
| 22 |
+
sentences.append(sentence.strip())
|
| 23 |
+
return sentences
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def create_input_to_t5(question, answer):
|
| 27 |
+
input_line = [question]
|
| 28 |
+
answer_paragraph = get_ans_sentence_with_stanza(answer, en_nlp)
|
| 29 |
+
for idx, answer_sent in enumerate(answer_paragraph):
|
| 30 |
+
sep_token = '[{}]'.format(idx)
|
| 31 |
+
input_line.append(sep_token)
|
| 32 |
+
input_line.append(answer_sent)
|
| 33 |
+
return ' '.join(input_line)
|
| 34 |
+
|
| 35 |
+
def process_t5_output(input_txt, output_txt):
|
| 36 |
+
pred_roles = []
|
| 37 |
+
answer_sentence = re.split('\[\d+\] ', input_txt)
|
| 38 |
+
question = answer_sentence[0].strip()
|
| 39 |
+
answer_sentence = answer_sentence[1:]
|
| 40 |
+
sentence_idx = re.findall('\[\d+\]', input_txt)
|
| 41 |
+
idx_to_sentence = zip(sentence_idx, answer_sentence)
|
| 42 |
+
pred_role = re.split('\[\d+\] ', output_txt)[1:]
|
| 43 |
+
pred_idx = re.findall('\[\d+\]', output_txt)
|
| 44 |
+
idx_to_role = {
|
| 45 |
+
idx: role.strip() for (idx, role) in zip(pred_idx, pred_role)
|
| 46 |
+
}
|
| 47 |
+
for _, (idx, sentence) in enumerate(idx_to_sentence):
|
| 48 |
+
pred_roles.append(' ' if idx not in idx_to_role else idx_to_role[idx])
|
| 49 |
+
return '\n'.join(pred_roles)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
|
| 53 |
+
def predict(question, answer):
|
| 54 |
+
input_txt = create_input_to_t5(question, answer)
|
| 55 |
+
input_ids = tokenizer(input_txt, return_tensors='pt').input_ids
|
| 56 |
+
outputs = model.generate(input_ids, max_length=512)
|
| 57 |
+
output_txt = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
|
| 58 |
+
return process_t5_output(input_txt, output_txt)
|
| 59 |
|
|
|
|
|
|
|
| 60 |
|
| 61 |
|
| 62 |
gr.Interface(
|
|
|
|
| 66 |
gr.inputs.Textbox(lines=1, label="Answer:"),
|
| 67 |
],
|
| 68 |
outputs=[
|
| 69 |
+
gr.outputs.Textbox(label="Predicted sentence-level functional roles"),
|
| 70 |
],
|
| 71 |
theme="peach",
|
| 72 |
title="Discourse structure of long-form answer",
|
requirements.txt
CHANGED
|
@@ -1,3 +1,4 @@
|
|
| 1 |
gradio
|
| 2 |
transformers
|
| 3 |
-
torch
|
|
|
|
|
|
| 1 |
gradio
|
| 2 |
transformers
|
| 3 |
+
torch
|
| 4 |
+
stanza
|