| from transformers import * |
| import gradio as gr |
| tokenizer_s = AutoTokenizer.from_pretrained("oskrmiguel/mt5-simplification-spanish") |
| model_s = AutoModelForSeq2SeqLM.from_pretrained("oskrmiguel/mt5-simplification-spanish") |
| tokenizer = BertTokenizerFast.from_pretrained('mrm8488/bert2bert_shared-spanish-finetuned-summarization') |
| model = EncoderDecoderModel.from_pretrained('mrm8488/bert2bert_shared-spanish-finetuned-summarization') |
| model_q = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/bert2bert-spanish-question-generation") |
| tokenizer_q = AutoTokenizer.from_pretrained("mrm8488/bert2bert-spanish-question-generation") |
| tokenizer_a = AutoTokenizer.from_pretrained("mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es") |
| model_a = AutoModelForQuestionAnswering.from_pretrained("mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es") |
| nlp_a = pipeline('question-answering', model='mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es', |
| tokenizer=( |
| 'mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es', |
| {"use_fast": False} |
| )) |
| def generate_summary(text): |
| inputs = tokenizer([text], padding="max_length", truncation=True, max_length=64, return_tensors="pt") |
| input_ids = inputs.input_ids |
| attention_mask = inputs.attention_mask |
| output = model.generate(input_ids, attention_mask=attention_mask) |
| return tokenizer.decode(output[0], skip_special_tokens=True) |
| def generate_simple_text(data): |
| outputs = [] |
| for text in data.split("."): |
| inputs = tokenizer_s(text, max_length=1024, padding=True, truncation=True, return_tensors='pt') |
| output = model_s.generate(inputs['input_ids'], max_length=100) |
| outputs.append(['\n'.join([tokenizer_s.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in output])]) |
| return outputs |
| def generate_questions(data): |
| outputs = [] |
| for text in data.split("."): |
| inputs_q = tokenizer_q(text, return_tensors="pt") |
| outputs_q = model_q.generate(inputs_q['input_ids'], max_length=100) |
| question = tokenizer_q.decode(outputs_q[0], skip_special_tokens=True) |
| outputs.append(question) |
| return outputs |
| def generate_answer(question_texts, context_text): |
| return nlp_a({'question': question_texts,'context': context_text } |
| )['answer'] |
| def generate_paragraph(data): |
| return list(filter(lambda x : x != '', data.split('\n'))) |
| contexto = gr.inputs.Textbox(lines=10, placeholder="Ingresa un cuento de niños") |
| resultado = gr.outputs.HTML(label="Resultado") |
| opciones = gr.inputs.CheckboxGroup(["Resumir", "Facil Lectura", "Generar Preguntas", "Ver Respuestas"]) |
| parrafo_longitud = gr.inputs.Slider(50, 500) |
| def generate_question(contexto,opciones, parrafo_longitud): |
| parrafos = generate_paragraph(contexto) |
| resultado="" |
| resumen=[] |
| preguntas=[] |
| simples=[] |
| respuestas=[] |
| for i, text in enumerate(parrafos): |
| if len(text) < parrafo_longitud: |
| resumen.append(text) |
| if "Facil Lectura" in opciones: |
| simples.append(text) |
| else: |
| sumarize = generate_summary(text) |
| resumen.append(sumarize) |
| if "Generar Preguntas" in opciones: |
| questions = generate_questions(sumarize) |
| preguntas.append(str(i+1)+"-> "+questions[0]) |
| if "Ver Respuestas" in opciones: |
| respuestas.append(str(i+1)+"-> "+generate_answer(questions[0], sumarize)) |
| if "Facil Lectura" in opciones: |
| simples.append(generate_simple_text(sumarize)[0][0]) |
| resultado += "<p><b>Resumen:</b> "+'<br/>'.join(resumen)+"</p>" |
| resultado += "<p><b>Texto Simple:</b> "+'<br/>'.join(simples)+"</p>" |
| resultado += "<p><b>Preguntas:</b> "+'<br/>'.join(preguntas)+"</p>" |
| resultado += "<p><b>Respuestas:</b> "+'<br/>'.join(respuestas)+"</p>" |
| return resultado |
| iface = gr.Interface( |
| fn=generate_question, |
| inputs=[contexto, opciones, parrafo_longitud], |
| outputs=resultado) |
| iface.launch(debug=True) |