| import gradio as gr | |
| import torch | |
| from transformers import pipeline | |
| from transformers import PegasusForConditionalGeneration, PegasusTokenizer | |
| classifier = pipeline( | |
| "question-answering", | |
| model="deepset/roberta-base-squad2", | |
| tokenizer="deepset/roberta-base-squad2" | |
| ) | |
| model_name = 'tuner007/pegasus_paraphrase' | |
| torch_device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
| tokenizer3 = PegasusTokenizer.from_pretrained(model_name) | |
| model3 = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device) | |
| def qa_paraphrase(text_input, question): | |
| prediction = classifier( | |
| context=text_input, | |
| question=question, | |
| truncation=True, | |
| max_length=512, | |
| padding=True, | |
| ) | |
| answer = prediction['answer'] | |
| answer_start = prediction['start'] | |
| answer_end = prediction['end'] | |
| context = text_input.split(".") | |
| for i in range(len(context)): | |
| if answer in context[i]: | |
| sentence = context[i].strip() + "." | |
| break | |
| batch = tokenizer3([sentence],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device) | |
| translated = model3.generate(**batch,max_length=60,num_beams=10, num_return_sequences=1, temperature=1.5) | |
| paraphrase = tokenizer3.batch_decode(translated, skip_special_tokens=True)[0] | |
| return f"Answer: {answer}\nLong Form Answer: {paraphrase}" | |
| iface = gr.Interface( | |
| fn=qa_paraphrase, | |
| inputs=[ | |
| gr.inputs.Textbox(label="Text Input"), | |
| gr.inputs.Textbox(label="Question") | |
| ], | |
| outputs=gr.outputs.Textbox(label="Output"), | |
| title="Long Form Question Answering", | |
| description="mimics long form question answering by extracting the sentence containing the answer and paraphrasing it" | |
| ) | |
| iface.launch() | |