Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| """ABSTRACTGEN_ES FINAL.ipynb | |
| Automatically generated by Colaboratory. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1XdfeMcdDbRuRmOGGiOmkiCP9Yih5JXyF | |
| # installs | |
| """ | |
| import os | |
| os.system('pip install gpt_2_simple') | |
| os.system('pip install os.system') | |
| os.system('pip install gradio') | |
| os.system('pip install huggingface_hub') | |
| os.system('pip install easynmt') | |
| os.system('pip install sentence-transformers') | |
| os.system('curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash') | |
| os.system('apt-get install git-lfs') | |
| os.system('git lfs install') | |
| os.system('git clone https://huggingface.co/franz96521/AbstractGeneratorES ') | |
| #os.system('cd AbstractGeneratorES') | |
| print(os.getcwd()) | |
| print(os.listdir()) | |
| # Commented out IPython magic to ensure Python compatibility. | |
| # %cd '/content/AbstractGeneratorES' | |
| """# Init""" | |
| import gpt_2_simple as gpt2 | |
| import os | |
| import tensorflow as tf | |
| import pandas as pd | |
| import re | |
| model_name = "124M" | |
| if not os.path.isdir(os.path.join("models", model_name)): | |
| print(f"Downloading {model_name} model...") | |
| gpt2.download_gpt2(model_name=model_name) | |
| path = os.getcwd()+'/AbstractGeneratorES/AbstractGenerator/' | |
| checkpoint_dir =path+'weights/' | |
| data_path = path+'TrainigData/' | |
| file_name_en = 'en' | |
| file_path_en = data_path+file_name_en | |
| file_name_es = 'es' | |
| file_path_es = data_path+file_name_es | |
| prefix= '<|startoftext|>' | |
| sufix ='<|endoftext|>' | |
| import gradio as gr | |
| import random | |
| from easynmt import EasyNMT | |
| from sentence_transformers import SentenceTransformer, util | |
| def generateAbstract(text): | |
| tf.compat.v1.reset_default_graph() | |
| sess = gpt2.start_tf_sess() | |
| gpt2.load_gpt2(sess,checkpoint_dir=checkpoint_dir,run_name='run1') | |
| txt = gpt2.generate(sess,prefix=str(text)+"\nABSTRACT", return_as_list=True,truncate=sufix,checkpoint_dir=checkpoint_dir,nsamples=1)[0] | |
| return txt | |
| def removeAbstract(text): | |
| p = text.find("Introducción") | |
| p2 = text.find("INTRODUCCIÓN") | |
| print(p,p2) | |
| if(p != -1): | |
| return (text[:p] , text[p:] ) | |
| if(p2 != -1): | |
| return (text[:p2] , text[p2:] ) | |
| def generated_similarity(type_of_input, cn_text): | |
| if(type_of_input == "English"): | |
| tf.compat.v1.reset_default_graph() | |
| model2 = EasyNMT('opus-mt') | |
| cn_text = model2.translate(cn_text, target_lang='es') | |
| print(cn_text) | |
| abstract_original , body = removeAbstract(cn_text) | |
| tf.compat.v1.reset_default_graph() | |
| generated_Abstract = generateAbstract(body) | |
| sentences = [abstract_original, generated_Abstract] | |
| model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
| #Compute embedding for both lists | |
| embedding_1= model.encode(sentences[0], convert_to_tensor=True) | |
| embedding_2 = model.encode(sentences[1], convert_to_tensor=True) | |
| generated_similarity = util.pytorch_cos_sim(embedding_1, embedding_2) | |
| ## tensor([[0.6003]]) | |
| return f'''TEXTO SIN ABSTRACT\n | |
| {body}\n | |
| ABSTRACT ORIGINAL\n | |
| {abstract_original}\n | |
| ABSTRACT GENERADO\n | |
| {generated_Abstract}\n | |
| SIMILARIDAD DE ABSTRACT: {float(round(generated_similarity.item()*100, 3))}% | |
| ''' | |
| elif type_of_input == "Spanish": | |
| abstract_original , body = removeAbstract(cn_text) | |
| tf.compat.v1.reset_default_graph() | |
| generated_Abstract = generateAbstract(body) | |
| sentences = [abstract_original, generated_Abstract] | |
| model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
| #Compute embedding for both lists | |
| embedding_1= model.encode(sentences[0], convert_to_tensor=True) | |
| embedding_2 = model.encode(sentences[1], convert_to_tensor=True) | |
| generated_similarity = util.pytorch_cos_sim(embedding_1, embedding_2) | |
| return f'''TEXTO SIN ABSTRACT\n | |
| {body}\n | |
| ABSTRACT ORIGINAL\n | |
| {abstract_original}\n | |
| ABSTRACT GENERADO\n | |
| {generated_Abstract}\n | |
| SIMILARIDAD DE ABSTRACT: {float(round(generated_similarity.item()*100, 3))}% | |
| ''' | |
| def generated_abstract(type_of_input, cn_text): | |
| if type_of_input == "English": | |
| tf.compat.v1.reset_default_graph() | |
| model2 = EasyNMT('opus-mt') | |
| cn_text = model2.translate(cn_text, target_lang='es') | |
| generated_Abstract = generateAbstract(cn_text) | |
| return f'''TEXTO SIN ABSTRACT\n | |
| {cn_text}\n | |
| ABSTRACT GENERADO\n | |
| {generated_Abstract}\n | |
| ''' | |
| elif type_of_input == "Spanish": | |
| tf.compat.v1.reset_default_graph() | |
| generated_Abstract = generateAbstract(cn_text) | |
| return f'''TEXTO SIN ABSTRACT\n | |
| {cn_text}\n | |
| ABSTRACT GENERADO\n | |
| {generated_Abstract}\n | |
| ''' | |
| block = gr.Blocks() | |
| with block: | |
| gr.Markdown('''ABSTRACTGEN_ES''') | |
| gr.Markdown('''An app that can generate abstracts in Spanish based on the text that you input via document text and if you already have an abstract and need a different idea, check how similar the new abstract is to the original one. | |
| ''') | |
| gr.Markdown(''' We used Blocks (beta), which allows you to build web-based demos in a flexible way using the gradio library. Blocks is a more low-level and flexible alternative to the core Interface class. | |
| The main problem with this library right now is that | |
| it doesn't support some functionality that Interface | |
| class has''') | |
| gr.Markdown('''To get more info about this project go to: https://sites.google.com/up.edu.mx/somos-pln-abstractgen-es/inicio?authuser=0''') | |
| with gr.Tab("Full text and text similarity"): | |
| gr.Markdown("Choose the language:") | |
| type_of_input = gr.inputs.Radio(["English", "Spanish"], label="Input Language") | |
| with gr.Row(): | |
| cn_text = gr.inputs.Textbox(placeholder="Full text", lines=7) | |
| with gr.Row(): | |
| cn_results1 = gr.outputs.Textbox(label="Abstract generado") | |
| cn_run = gr.Button("Run") | |
| cn_run.click(generated_similarity, inputs=[type_of_input, cn_text], outputs=[cn_results1]) | |
| with gr.Tab("Only text with no abstract"): | |
| gr.Markdown("Choose the language:") | |
| type_of_input = gr.inputs.Radio(["English", "Spanish"], label="Input Language") | |
| with gr.Row(): | |
| cn_text = gr.inputs.Textbox(placeholder="Text without abstract", lines=7) | |
| with gr.Row(): | |
| cn_results1 = gr.outputs.Textbox(label="Abstract generado") | |
| cn_run = gr.Button("Run") | |
| cn_run.click(generated_abstract, inputs=[type_of_input, cn_text], outputs=cn_results1) | |
| block.launch(debug = True) | |