import gradio as gr from gradio.mix import Parallel from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import os from transformers import T5TokenizerFast, T5ForConditionalGeneration from transformers import PegasusForConditionalGeneration, PegasusTokenizer import torch import itertools import random import nltk from nltk.tokenize import sent_tokenize import requests import json nltk.download('punkt') from fastT5 import export_and_get_onnx_model device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") T5_tokenizer = AutoTokenizer.from_pretrained("jaimin/T5-Large") T5_model = export_and_get_onnx_model('jaimin/T5-Large') def get_paraphrases(text, n_predictions=3, top_k=50, max_length=256, device="cpu"): para = [] sentence = text for sent in sent_tokenize(sentence): text = "paraphrase: "+sent + " " encoding = T5_tokenizer.encode_plus(text, padding=True, return_tensors="pt", truncation=True) input_ids, attention_masks = encoding["input_ids"].to(device), encoding["attention_mask"].to(device) model_output = T5_model.generate( input_ids=input_ids,attention_mask=attention_masks, max_length = 512, early_stopping=True, num_beams=15, num_beam_groups = 3, num_return_sequences=n_predictions, diversity_penalty = 0.70, temperature=0.7, no_repeat_ngram_size=2) outputs = [] for output in model_output: generated_sent = T5_tokenizer.decode( output, skip_special_tokens=True, clean_up_tokenization_spaces=True ) if ( generated_sent.lower() != sentence.lower() and generated_sent not in outputs ): outputs.append(generated_sent.replace('paraphrasedoutput:', "")) para.append(outputs) print(para) a = list(itertools.product(*para)) random.shuffle(a) l=[] for i in range(len(a)): l.append(" ".join(a[i])) final_output=[] for i in range(len(l)): final_output.append("* " + l[i] + ".") paraphrase = "\n".join(final_output) return paraphrase iface = gr.Interface(fn=get_paraphrases, inputs=[gr.inputs.Textbox(lines=5)],outputs="text") iface.launch()