Spaces:

Hexamind
/

GPTdoc

Build error

File size: 6,931 Bytes

65642c3

class Doc:
    def __init__(self, fulltext: str = '', title: str = '', params: dict = {}):
        self.params = params
        self.lines = [Line(text.strip(), self.params) for text in fulltext.split("\n") if text.strip()]
        self.title, self.lines = self._get_title(title)
        self.container = Container(lines=self.lines, title=self.title, father=self, params=params)
        self.tasks = [c.get_task(self.container.one_liner) for c in self.container.containers if c.task]
        self.fulltext = fulltext

    def _get_title(self, title):
        lines = self.lines
        if self.params['type'] == 'input_text':
            if self.lines and self.lines[0] and self.lines[0].type == 'title':
                title = self.lines[0].text
                lines = lines[1:]
            else:
                title = 'the title is missing'
        return title, lines

    def replace_tasks(self, resolutions: [str]):
        starts = self.params['startswith_']
        reverts = {starts[k]: k for k in starts}
        task_starter = reverts['task']
        lines = self.fulltext.split('\n')
        new_lines = [line if not line.startswith(task_starter) else next(iter(resolutions)) for line in lines]
        new_fulltext = "\n".join(new_lines)
        return new_fulltext


class InputDoc(Doc):

    def __init__(self, fulltext='', title=''):
        self.params = {
            'type': 'input_text',
            'startswith_':
                {'!!': 'title', '++': 'comment', '??': 'task',
                 '# ': '1', '## ': '2', '### ': '3', '####': '4', '#####': '5', '######': '6'}
        }
        super().__init__(fulltext=fulltext, title=title, params=self.params)


class WikiPage(Doc):

    def __init__(self, fulltext='', title=''):
        self.params = {
            'type': 'wiki',
            'startswith_':
                {'== ': '1', '=== ': '2', '==== ': '3', '===== ': '4', '====== ': '5', '======= ': '6'},
            'endswith_':
                [' ==', ' ===', ' ====', ' =====', ' ======', ' ======'],

            'discarded': ["See also", "Notes", "References", "Sources", "External links", "Bibliography",
                          "Cinematic adaptations", "Further reading", "Maps"]
        }
        super().__init__(fulltext=fulltext, title=title, params=self.params)

    def get_paragraphs(self, chunk=500):
        return self.container.get_paragraphs(chunk)


class Container:

    def __init__(self, lines=[], level=0, title='', father=None, params={}):

        self.normals = []
        self.normal = ''
        self.comments = []
        self.comment = ''
        self.tasks = []
        self.task = ''
        self.children = []
        self.level = level
        self.title = title
        self.father = father

        self._expand(lines)

        if params and 'discarded' in params.keys():
            self.children = [child for child in self.children if child.title not in params['discarded']]

        self.containers = [self]
        for child in self.children:
            self.containers += child.containers
        self.one_liner = self.title + ' ' + self.comment
        self.root_text = self.one_liner + ' ' + self.normal
        self.text = self.root_text
        for child in self.children:
            self.text += ' ' + child.text

        self.summary = self.text

    def _expand(self, lines):
        new_child = False
        new_child_lines = []
        new_child_title = []
        for line in lines:
            if not new_child:
                if line.type == 'normal':
                    self.normals.append(line)
                    self.normal += ' ' + line.text
                elif line.type == 'comment':
                    self.comments.append(line)
                    self.comment += ' ' + line.text
                elif line.type == 'task':
                    self.tasks.append(line)
                    self.task += ' ' + line.text
                elif line.is_structure:
                    new_child = True
                    new_child_lines = []
                    new_child_title = line.text
                    line.level = self.level + 1
                    self.one_liner = self.title + self.comment
            else:
                if self.level + 1 < line.level or not line.is_structure:
                    new_child_lines.append(line)
                elif self.level + 1 == line.level:
                    self.children.append(Container(lines=new_child_lines,
                                                   level=self.level + 1,
                                                   title=new_child_title,
                                                   father=self))
                    new_child_lines = []
                    new_child_title = line.text
        if new_child:
            self.children.append(Container(lines=new_child_lines,
                                           level=self.level + 1,
                                           title=new_child_title,
                                           father=self))

    def get_task(self, doc_one_liner):
        siblings_ = self.father.children.copy()
        index = siblings_.index(self)
        siblings_before_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if idx < index]
        siblings_after_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if index < idx]

        task = {'description': self.task,
                'about': self.one_liner,
                'doc_description': doc_one_liner,
                'above': self.father.one_liner,
                'before': siblings_before_context,
                'after': siblings_after_context}
        return task

    def get_paragraphs(self, chunk=500):
        if len(self.text) < chunk:
            paragraphs = [self.text]
        else:
            paragraphs = [self.root_text]
            for child in self.children:
                paragraphs += child.get_paragraphs(chunk)
        return paragraphs


class Line:

    def __init__(self, text, params):
        self.text = text
        self.type, self.text = self._parse_text(params)
        self.level = int(self.type) if self.type.isdigit() else -1
        self.is_structure = 0 < self.level

    def _parse_text(self, params):
        def strip_text(text_, start, end):
            text_ = text_.split(start)[1]
            if end != "":
                text_ = text_.split(end)[0]
            # text += ". \n"
            return text_.strip()

        startswith_ = params['startswith_']

        endswith_ = params['endswith_'] if 'endswith_' in params.keys() else [""] * len(startswith_)
        types = [(strip_text(self.text, starter, endswith_[i]), startswith_[starter])
                 for i, starter in enumerate(startswith_.keys())
                 if self.text.startswith(starter)]
        (text, type_) = types[0] if types else (self.text, 'normal')
        return type_, text.strip()