from nltk import sent_tokenize, word_tokenize import re import openai import gspread class Sentence: def __init__(self,text,error_message,suggestions): self.text = text self.length = len(text) self.error_message = error_message self.suggestions = suggestions class LanguageComponentSplitter: def remove_unwanted_characters(self,text): return re.sub('\s+',' ',text) def split_sentences(self,text): text_clean = self.remove_unwanted_characters(text) sentences = sent_tokenize(text_clean) return sentences class Summarization_task: def summarize_gpt(self, text): openai.api_key = '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' response = openai.Completion.create( model="text-davinci-003", prompt="Pretend you are a legal copywriter give me a list of the errors you find and then provide a correction in a list, only use spanish: " + text, temperature=0.7, max_tokens=1000, top_p=1.0, frequency_penalty=0.0, presence_penalty=0.0 ) return response["choices"][0]["text"] #To be implemented when API key is obtained. def summarize_bard(self,text): token = '' bard = Bard(token=token) source_text = '' target_text = '' prompt = "Pretend you are a legal copywriter give me a list of the writing and problems with quotes and then provide a correction, only use spanish: " + text translated = bard.get_answer(prompt)['content'] return translated class Utilities: def specialized_words(self,sentence): words = word_tokenize(sentence) return words def connect_to_cloud(self): gc = gspread.service_account(filename="creds.json") sh = gc.open("entrenamiento_txt_jur").sheet1 return sh def write_on_sheet(self,text_raw,text_proc): sh = self.connect_to_cloud() next_row = self.find_next_available_cell(sh) sh.update("A{}".format(next_row),text_raw) sh.update("B{}".format(next_row),text_proc) return "Printed on db" def find_next_available_cell(self,sh): str_list = list(filter(None, sh.col_values(1))) return str(len(str_list) + 1)