File size: 2,306 Bytes
aaaff44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from nltk import sent_tokenize, word_tokenize
import re
import openai
import gspread

class Sentence:
    def __init__(self,text,error_message,suggestions):
        self.text = text
        self.length = len(text)
        self.error_message = error_message
        self.suggestions = suggestions

class LanguageComponentSplitter:
    def remove_unwanted_characters(self,text):
        return re.sub('\s+',' ',text)

    def split_sentences(self,text):
        text_clean = self.remove_unwanted_characters(text)
        sentences = sent_tokenize(text_clean)
        return sentences


class Summarization_task:
    def summarize_gpt(self, text):
        openai.api_key = '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'
        response = openai.Completion.create(
            model="text-davinci-003",
            prompt="Pretend you are a legal copywriter give me a list of the errors you find and then provide a correction in a list, only use spanish: " + text,
            temperature=0.7,
            max_tokens=1000,
            top_p=1.0,
            frequency_penalty=0.0,
            presence_penalty=0.0
        )
        return response["choices"][0]["text"]

    #To be implemented when API key is obtained.
    def summarize_bard(self,text):
        token = ''
        bard = Bard(token=token)
        source_text = ''
        target_text = ''
        prompt = "Pretend you are a legal copywriter give me a list of the writing and problems with quotes and then provide a correction, only use spanish:  " + text
        translated = bard.get_answer(prompt)['content']
        return translated


class Utilities:
    def specialized_words(self,sentence):
        words = word_tokenize(sentence)
        return words
    def connect_to_cloud(self):
        gc = gspread.service_account(filename="creds.json")
        sh = gc.open("entrenamiento_txt_jur").sheet1
        return sh
    def write_on_sheet(self,text_raw,text_proc):
        sh = self.connect_to_cloud()
        next_row = self.find_next_available_cell(sh)
        sh.update("A{}".format(next_row),text_raw)
        sh.update("B{}".format(next_row),text_proc)
        return "Printed on db"
    def find_next_available_cell(self,sh):
        str_list = list(filter(None, sh.col_values(1)))
        return str(len(str_list) + 1)