|
|
from nltk import sent_tokenize, word_tokenize |
|
|
import re |
|
|
import openai |
|
|
import gspread |
|
|
|
|
|
class Sentence: |
|
|
def __init__(self,text,error_message,suggestions): |
|
|
self.text = text |
|
|
self.length = len(text) |
|
|
self.error_message = error_message |
|
|
self.suggestions = suggestions |
|
|
|
|
|
class LanguageComponentSplitter: |
|
|
def remove_unwanted_characters(self,text): |
|
|
return re.sub('\s+',' ',text) |
|
|
|
|
|
def split_sentences(self,text): |
|
|
text_clean = self.remove_unwanted_characters(text) |
|
|
sentences = sent_tokenize(text_clean) |
|
|
return sentences |
|
|
|
|
|
|
|
|
class Summarization_task: |
|
|
def summarize_gpt(self, text): |
|
|
openai.api_key = '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' |
|
|
response = openai.Completion.create( |
|
|
model="text-davinci-003", |
|
|
prompt="Pretend you are a legal copywriter give me a list of the errors you find and then provide a correction in a list, only use spanish: " + text, |
|
|
temperature=0.7, |
|
|
max_tokens=1000, |
|
|
top_p=1.0, |
|
|
frequency_penalty=0.0, |
|
|
presence_penalty=0.0 |
|
|
) |
|
|
return response["choices"][0]["text"] |
|
|
|
|
|
|
|
|
def summarize_bard(self,text): |
|
|
token = '' |
|
|
bard = Bard(token=token) |
|
|
source_text = '' |
|
|
target_text = '' |
|
|
prompt = "Pretend you are a legal copywriter give me a list of the writing and problems with quotes and then provide a correction, only use spanish: " + text |
|
|
translated = bard.get_answer(prompt)['content'] |
|
|
return translated |
|
|
|
|
|
|
|
|
class Utilities: |
|
|
def specialized_words(self,sentence): |
|
|
words = word_tokenize(sentence) |
|
|
return words |
|
|
def connect_to_cloud(self): |
|
|
gc = gspread.service_account(filename="creds.json") |
|
|
sh = gc.open("entrenamiento_txt_jur").sheet1 |
|
|
return sh |
|
|
def write_on_sheet(self,text_raw,text_proc): |
|
|
sh = self.connect_to_cloud() |
|
|
next_row = self.find_next_available_cell(sh) |
|
|
sh.update("A{}".format(next_row),text_raw) |
|
|
sh.update("B{}".format(next_row),text_proc) |
|
|
return "Printed on db" |
|
|
def find_next_available_cell(self,sh): |
|
|
str_list = list(filter(None, sh.col_values(1))) |
|
|
return str(len(str_list) + 1) |
|
|
|
|
|
|