Spaces:
Runtime error
Runtime error
| import os | |
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline | |
| # versions = { | |
| # 'v1': { | |
| # 'name': "milyiyo/paraphraser-german-mt5-small", | |
| # 'tokenizer': None, | |
| # 'model': None | |
| # }, | |
| # 'v2': { | |
| # 'name':"milyiyo/paraphraser-german-mt5-small-v2", | |
| # 'tokenizer': None, | |
| # 'model': None | |
| # }, | |
| # } | |
| # versions['v1']['tokenizer'] = AutoTokenizer.from_pretrained(versions['v1']['name']) | |
| # versions['v1']['model'] = AutoModelForSeq2SeqLM.from_pretrained(versions['v1']['name']) | |
| # versions['v2']['tokenizer'] = AutoTokenizer.from_pretrained(versions['v2']['name']) | |
| # versions['v2']['model'] = AutoModelForSeq2SeqLM.from_pretrained(versions['v2']['name']) | |
| tokenizer = AutoTokenizer.from_pretrained("milyiyo/paraphraser-german-mt5-small-v2") | |
| model = AutoModelForSeq2SeqLM.from_pretrained("milyiyo/paraphraser-german-mt5-small-v2") | |
| def generate_v1(inputs, count): | |
| """Generate text using a Beam Search strategy with repetition penalty.""" | |
| model_outputs = model.generate(inputs["input_ids"], | |
| early_stopping=True, | |
| length_penalty=1.0, | |
| max_length=1024, | |
| no_repeat_ngram_size=2, | |
| num_beams=10, | |
| repetition_penalty=3.5, | |
| num_return_sequences=count | |
| ) | |
| sentences = [] | |
| for output in model_outputs: | |
| sentences.append(tokenizer.decode(output, skip_special_tokens=True)) | |
| return sentences | |
| def generate_v2(inputs, count): | |
| """Generate text using a Beam Search strategy.""" | |
| model_outputs = model.generate(inputs["input_ids"], | |
| early_stopping=True, | |
| length_penalty=2.0, | |
| max_length=1024, | |
| no_repeat_ngram_size=2, | |
| num_beams=5, | |
| temperature=1.5, | |
| num_return_sequences=count | |
| ) | |
| sentences = [] | |
| for output in model_outputs: | |
| sentences.append(tokenizer.decode(output, skip_special_tokens=True)) | |
| return sentences | |
| def generate_v3(inputs, count): | |
| """Generate text using a Diverse Beam Search strategy.""" | |
| model_outputs = model.generate(inputs["input_ids"], | |
| num_beams=5, | |
| max_length=1024, | |
| temperature=1.5, | |
| num_beam_groups=5, | |
| diversity_penalty=2.0, | |
| no_repeat_ngram_size=2, | |
| early_stopping=True, | |
| length_penalty=2.0, | |
| num_return_sequences=count) | |
| sentences = [] | |
| for output in model_outputs: | |
| sentences.append(tokenizer.decode(output, skip_special_tokens=True)) | |
| return sentences | |
| def generate_v4(encoding, count): | |
| """Generate text using a Diverse Beam Search strategy.""" | |
| print(encoding) | |
| input_ids, attention_masks = encoding["input_ids"], encoding["attention_mask"] | |
| print(input_ids) | |
| print(attention_masks) | |
| outputs = model.generate(input_ids=input_ids, | |
| attention_mask=attention_masks, | |
| max_length=512, | |
| do_sample=True, | |
| top_k=120, | |
| top_p=0.95, | |
| early_stopping=True, | |
| num_return_sequences=count) | |
| res = [] | |
| for output in outputs: | |
| line = tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True) | |
| res.append(line) | |
| return res | |
| def paraphrase(sentence: str, count: str): | |
| p_count = int(count) | |
| if p_count <= 0 or len(sentence.strip()) == 0: | |
| return {'result': []} | |
| sentence_input = sentence | |
| text = f"paraphrase: {sentence_input} </s>" | |
| encoding = tokenizer(text, return_tensors="pt") | |
| return { | |
| 'result': { | |
| 'generate_v1':generate_v1(encoding, p_count), | |
| 'generate_v2':generate_v2(encoding, p_count), | |
| 'generate_v3':generate_v3(encoding, p_count), | |
| 'generate_v4':generate_v4(encoding, p_count) | |
| } | |
| } | |
| def paraphrase_dummy(sentence: str, count: str): | |
| return {'result': []} | |
| iface = gr.Interface(fn=paraphrase, | |
| inputs=[ | |
| gr.inputs.Textbox(lines=2, placeholder=None, label='Sentence'), | |
| gr.inputs.Number(default=3, label='Paraphrases count'), | |
| ], | |
| outputs=[gr.outputs.JSON(label=None)]) | |
| iface.launch() |