Spaces:

Hexamind
/

GenProp

Runtime error

File size: 7,530 Bytes

498db6b

from src.domain.paragraph import Paragraph
from src.domain.block import Block

INFINITE = 10000


class Container:

    def __init__(self, paragraphs: [Paragraph], title: Paragraph = None, level: int = 0, index: [int] = None,
                 father=None, id_=0):
        """
        should add some summary or infos on content (by a priori generation)
        """
        if index is None:
            index = []
        self.level = level
        if not self.level:
            pass
        self.title = title
        self.paragraphs = []
        self.all_paragraphs = paragraphs
        self.children = []
        self.index = index
        self.father = father  # if not father, then the container is at the top of the hierarchy
        self.id_ = int(str(1) + str(father.id_) + str(id_))
        if paragraphs:
            self.paragraphs, self.children = self.create_children(paragraphs.copy(), level, index)
        self.containers = [self]
        for child in self.children:
            self.containers += child.containers
        self.blocks = self.get_blocks()
        self.normal, self.comment, self.task, _ = self.sort_paragraphs()

        self.one_liner = (self.title.text if self.title else '') + ' ' + self.comment
        self.root_text = self.one_liner + ' ' + self.normal


    @property
    def text(self):
        text = ""
        if self.title:
            text = "Titre " + str(self.level) + " : " + self.title.text + '\n'
        for p in self.paragraphs:
                text += p.text + '\n'
        for child in self.children:
                text += child.text
        return text

    @property
    def table_of_contents(self):
        """
        Not used
        """
        toc = []
        if self.title:
            toc += [{str(self.level): self.title.text}]
        if self.children:
            for child in self.children:
                toc += child.table_of_contents
        return toc

    def move(self, position: int, new_father=None):
        """
        Not used
        """
        current_father = self.father  
        current_father.children.remove(self)

        self.rank = new_father.rank + 1 if new_father else 0
        self.father = new_father
        if position < len(new_father.children):
            new_father.children.insert(position, self)
        else:
            new_father.children.append(self)

    def create_children(self, paragraphs, level, rank) -> ([], []):
        """
        creates children containers or directly attached content
        and returns the list of containers and contents of level+1
        :return:
        [Content or Container]
        """
        attached_paragraphs = []
        container_paragraphs = []
        container_title = None
        children = []
        in_children = False
        level = INFINITE
        child_id = 0

        while paragraphs:
            p = paragraphs.pop(0)
            if not in_children and not p.is_structure:
                attached_paragraphs.append(p)
            else:
                in_children = True
                if p.is_structure and p.level <= level:  # if p is higher or equal in hierarchy
                    if container_paragraphs or container_title:
                        children.append(Container(container_paragraphs, container_title, level, rank, self, child_id))
                        child_id += 1
                    container_paragraphs = []
                    container_title = p
                    level = p.level

                else:  # p is strictly lower in hierarchy
                    container_paragraphs.append(p)

        if container_paragraphs or container_title:
            children.append(Container(container_paragraphs, container_title, level, rank, self, child_id))
            child_id += 1

        return attached_paragraphs, children

    @property
    def structure(self):

        self_structure = {str(self.id_): {
            'index': str(self.id_),
            'canMove': True,
            'isFolder': True,
            'children': [p.id_ for p in self.paragraphs] + [child.id_ for child in self.children],
            'canRename': True,
            'data': {},
            'level': self.level,
            'title': self.title.text if self.title else 'root'
        }}
        paragraphs_structure = [p.structure for p in self.paragraphs]
        structure = [self_structure] + paragraphs_structure
        for child in self.children:
            structure += child.structure
        return structure

    def get_lang(self):
        """
        returns the main language of the document
        :return:
        """

    def get_structure(self, level=2):
        """
        returns the structure of the document
        :return:
        """

    def create_embeddings(self):
        """

        :return:
        """

    def get_blocks(self):
        block = Block(level=self.level, index=self.index)
        if self.title:
            block.title = self.title.text
        for p in self.paragraphs:
            if not p.blank:
                if p.text.startswith('##### '):
                    special_action = p.text.lstrip('##### ')
                    block.specials.append(special_action)
                else:
                    block.content += p.text
        blocks = [block] if block.content or block.specials else []
        for child in self.children:
            blocks += child.blocks
        return blocks

    def get_fulltask(self, doc_one_liner):
        index = 0
        siblings_ = []
        if isinstance(self.father, Container):
            siblings_ = self.father.children.copy()
            index = siblings_.index(self)
        siblings_before_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if idx < index]
        siblings_after_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if index < idx]

        fulltask = {'description': self.task,
                    'about': self.one_liner,
                    'doc_description': doc_one_liner,
                    'above': self.father.one_liner if isinstance(self.father, Container) else '',
                    'before': siblings_before_context,
                    'after': siblings_after_context}
        return fulltask

    def sort_paragraphs(self) -> (str, str, str, str):
        mapping = {'normal': '', 'comment': '', 'task': '', 'title': ''}
        for p in self.paragraphs:
            mapping[p.type] += ' ' + p.parsed_text
        return mapping['normal'], mapping['comment'], mapping['task'], mapping['title']
    
    def get_all_styles_used_in_doc_except_list(self):
        """
        loop in doc? rather thann in container? (since it applies only to container of level 0)
        """
        styles = []
        for p in self.all_paragraphs:
            styles.append(p.get_styles_in_paragraph_except_list())
        res = []
        #flatten the list
        temp = [item for sublist in styles for item in sublist]
        names = [style.name for style in temp]
        for s in temp:
            if s.name in names:
                res.append(s)
                names.remove(s.name)
        return res

    def get_list_styles(self):
        styles = []
        for p in self.all_paragraphs:
            styles.append(p.get_list_styles())
        res = list(set().union(*styles))
        return res
    
    def retrieve_number_of_misapplied_styles(self):
        res = 0
        for p in self.all_paragraphs:
            if p.style_misapplied:
                res += 1
        return res