Spaces:

Hexamind
/

GenProp

Runtime error

File size: 18,660 Bytes

import docx
from src.tools.doc_tools import *
from docxcompose.composer import Composer
from docx import Document as Document_compose
from docx.enum.table import WD_TABLE_ALIGNMENT
from src.domain.container import Container
from src.domain.container_requirements import Container_requirements
from src.domain.paragraph import Paragraph
from src.domain.styles import Styles
import shutil
import os
from docx.oxml.ns import qn
from docx.oxml.shared import OxmlElement
from docx.shared import Inches
from src.tools.pretty_print import pretty_print_block_and_indexes, pretty_print_paragraphs
from src.tools.index_creation import set_indexes
from src.reader.reader_for_requirements import WordReader

class Doc:

    """
    TODO: mettre _ devant les méthodes internes
    """

    def __init__(self, path='', id_=None):
        self.xdoc = docx.Document(path)
        self.title = get_title(path)
        self.name = self.title.split('.')[0]
        self.id_ = id(self)
        self.path = path
        self.paragraphs = [Paragraph(xp, self.id_, i, self) for (i, xp) in enumerate(self.xdoc.paragraphs)]
        self.handle_content_before_toc()
        self.requirements_paragraphs = WordReader(self.path).paragraphs if not "data/templates" in self.path else []
        self.container = Container(self.paragraphs, father=self)
        self.container_requirements = Container_requirements(self.requirements_paragraphs, father=self)
        set_indexes(self.container, self.path)
        set_indexes(self.container_requirements, self.path)
        self.styles = Styles(self.xdoc.styles)
        self.tasks = [c.get_fulltask(self.container.one_liner) for c in self.container.containers if c.task]
        self.blocks = self.get_blocks()
        self.blocks_requirements = self.get_blocks_requirements()


    def copy(self, new_doc_path):
        shutil.copyfile(self.path, new_doc_path)
        new_doc = Doc(new_doc_path)
        new_doc.save_as_docx(new_doc_path)
        return new_doc

    def clear(self):
        os.remove(self.path)

    def apply_template(self, template, options_list):
        """
        TODO: mettre le texte dans un fichier de config
        """
        log = []
        j = 0
        if ("Justifier le texte (Normal)" in options_list):
            log.append("Le contenu du document a été justifié")
            self.justify_content()
            self.save_as_docx()
        if("Recentrer les tableaux" in options_list):
            j = self.center_tables()
            log.append(f"{j} table{'s' if j>1 else ''} centrée{'s' if j>1 else ''}")
            self.save_as_docx()
        log.append(f"Le template {template.name} a été ajouté avant le document")
        self.rearrange_tables()
        self.save_as_docx()
        log = self.styles.apply_from(template.styles, log)
        self.save_as_docx()
        self.delete_toc(template)
        self.normal_style_for_empty_paragraphs()
        self.save_as_docx()
        self.append_doc_to_template_and_update_toc(template)
        return log

    def copy_one_style(self, src_style_name: str, dest_style_name: str, template):
        style_dest = template.styles.get_style_from_name(dest_style_name)
        src_style = self.styles.get_style_from_name(src_style_name)
        if src_style:
            log = self.styles.copy_one_style(src_style, style_dest)
            return log
        else:
            return None

    def get_different_styles_with_template(self, template):
        styles_used_in_doc = self.get_all_styles_used_in_doc_except_list()
        different_styles = get_difference_with_template(styles_used_in_doc, template)
        return different_styles

    def save_as_docx(self, path: str = ''):
        path = path if path else self.path
        self.path = path
        self.xdoc.save(path)

    def get_blocks(self):

        """
        TODO: do a function that determines if the Doc is not a template nor a generated doc
        TODO: merge the two functions for getting blocks
        TODO: why do we need two functions? in the end, we need only 
        """
        if "temp/generated_files" in self.path or "data/templates" in self.path:
            return

        def from_list_to_str(index_list):
            index_str = str(index_list[0])
            for el in index_list[1:]:
                index_str += '.' + str(el)
            return index_str

        blocks = self.container.blocks
        for block in blocks:
            block.doc = self.title
            block.index = from_list_to_str(block.index)
        return blocks
    

    def get_blocks_requirements(self):
        if "temp/generated_files" in self.path or "data/templates" in self.path:
            return

        def from_list_to_str(index_list):
            index_str = str(index_list[0])
            for el in index_list[1:]:
                index_str += '.' + str(el)
            return index_str

        blocks = self.container_requirements.blocks
        for block in blocks:
            block.doc = self.title
            block.index = from_list_to_str(block.index) if not isinstance(block.index, str) else block.index
            # print(f"{block.index}: {block.content}")
            # print("--------------------------------------------------")
        return blocks

    @property
    def toc(self):
        """
        return the paragraphs that are in the table of contents
        """
        return [p for p in self.paragraphs if p.toc]
    
    @property
    def structure(self):
        return self.container.structure

    def replace_tasks(self, resolutions: [str]):
        if len(resolutions) == len(self.tasks):  # exception to be handled
            p_tasks = [p for p in self.paragraphs if p.type == 'task']
            for p, r in zip(p_tasks, resolutions):
                p.set_text(r)
        else:
            print(f"résolutions : {len(resolutions)} != {len(self.tasks)} tasks")
        return self

    def get_paragraphs(self):
        return self.container.all_paragraphs
    
    def get_text_from_paragraphs(self):
        return [p.text for p in self.paragraphs]
    
    def check_document(self):
        """
        debugging function to analyse the doc
        """
        picCount = 0
        tabCount = 0
        for paragraph in self.xdoc.paragraphs:
            if picCount < len(self.xdoc.inline_shapes):
                print('\033[1mPicture \033[0m')
                picCount += 1
            elif paragraph.text:
                print(paragraph.text)
            elif tabCount < len(self.xdoc.tables):
                table = self.xdoc.tables[tabCount]
                data = []
                keys = None
                for i, row in enumerate(table.rows):
                    text = (cell.text for cell in row.cells)
                    if i == 0:
                        keys = tuple(text)
                        continue
                    row_data = dict(zip(keys, text))
                    data.append(row_data)
                print('\033[1mTable:\033[0m', data)
                tabCount += 1
            else:
                print('\033[1mEmpty paragraph\033[0m')

    
    def center_tables(self):
        j = 0
        for table in self.xdoc.tables:
            j += 1
            table.alignment = WD_TABLE_ALIGNMENT.CENTER
        return j
    
    def rearrange_tables(self):
        """
        Hotfix for autofit.
        directly from XML 
        """
        for t_idx, _ in enumerate(self.xdoc.tables):
            self.xdoc.tables[t_idx].autofit = True
            self.xdoc.tables[t_idx].allow_autofit = True
            self.xdoc.tables[t_idx]._tblPr.xpath("./w:tblW")[0].attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type"] = "auto"
            for row_idx, _ in enumerate(self.xdoc.tables[t_idx].rows):
                for cell_idx, _ in enumerate(self.xdoc.tables[t_idx].rows[row_idx].cells):
                    self.xdoc.tables[t_idx].rows[row_idx].cells[cell_idx]._tc.tcPr.tcW.type = 'auto'
                    self.xdoc.tables[t_idx].rows[row_idx].cells[cell_idx]._tc.tcPr.tcW.w = 0
    
    def center_images(self):
        """
        works only for images in the run
        """
        for paragraph in self.paragraphs:
            paragraph.center_paragraph()

    def justify_content(self):
        """
        applied only to normal style
        """
        for paragraph in self.paragraphs:
            paragraph.justify_paragraph()

    def number_images_in_doc(self):
        """
        for debug = not used
        """
        picCount = 0
        for _ in self.xdoc.paragraphs:
            if picCount < len(self.xdoc.inline_shapes):
                print('\033[1mPicture \033[0m')
                picCount += 1
        return picCount
    
    def get_all_styles_used_in_doc_except_list(self):
        return self.container.get_all_styles_used_in_doc_except_list()

    def get_list_styles(self):
        return self.container.get_list_styles()
    
    def retrieve_number_of_misapplied_styles(self):
        return self.container.retrieve_number_of_misapplied_styles()

    def normal_style_for_empty_paragraphs(self):
        for p in self.paragraphs:
            if p.blank and not p.toc:
                p.set_style(self.styles.get_style_from_name("Normal"))
        self.save_as_docx()


    def append_doc_to_template_and_update_toc(self,template):
        """
        TODO: rename Document_compose into XDocument
        Document_compose = plain old Document from docx
        Composer = from docxcompose => allows to modify several documents
        """
        master = Document_compose(template.path)
        composer = Composer(master)
        doc = Document_compose(self.path)
        composer.append(doc)
        composer.save(self.path)
        new_doc = Doc(self.path)
        update_table_of_contents(new_doc.xdoc)
        new_doc.save_as_docx()
    
    def delete_content_before_toc(self):
        """
        TODO: loop with paragraph (ours)
        """
        if self.contains_toc():
            for line in self.xdoc.paragraphs:
                if "toc" in line.style.name:
                    break
                if len(line.text) == 0:
                    self.delete_paragraph(line)
                    self.paragraphs.pop(0)
                    continue
                if 'toc' not in line.style.name:
                    self.delete_paragraph(line)
                    self.paragraphs.pop(0)
            self.save_as_docx()
    
    def delete_paragraph(self, paragraph):
        """
        TODO: to be put in paragraph
        """
        p = paragraph._element
        p.getparent().remove(p)
        paragraph._p = paragraph._element = None

    def delete_toc(self,template):
        """
        TODO: loop with paragraph (ours)
        """
        index_to_insert = None
        for index, p in enumerate(template.paragraphs):
            index_to_insert = index
            if ("table des matières" or "table of contents") in p.text.lower():
                index_to_insert += 1
                break
        xparagraphs_toc = [p.xparagraph for p in self.toc]
        for p in xparagraphs_toc:
            self.delete_paragraph(p)
            self.paragraphs.pop(0)
        self.save_as_docx()

    
    def insert_table_of_content(self,index):
        """
        To create a TOC (not used here)
        """
        paragraph = self.xdoc.paragraphs[index].insert_paragraph_before("", "Normal")
        paragraph.paragraph_format.space_before = Inches(0)
        paragraph.paragraph_format.space_after = Inches(0)
        run = paragraph.add_run()

        fldChar = OxmlElement('w:fldChar')  # creates a new element
        fldChar.set(qn('w:fldCharType'), 'begin')  # sets attribute on element

        instrText = OxmlElement('w:instrText')
        instrText.set(qn('xml:space'), 'preserve')  # sets attribute on element
        instrText.text = 'TOC \\o "1-5" \\h \\z \\u'   # change 1-3 depending on heading levels you need

        fldChar2 = OxmlElement('w:fldChar')
        fldChar2.set(qn('w:fldCharType'), 'separate')

        fldChar3 = OxmlElement('w:t')
        fldChar3.text = "Right-click to update field."
        fldChar3 = OxmlElement('w:updateFields') 
        fldChar3.set(qn('w:val'), 'true') 
        fldChar2.append(fldChar3)

        fldChar4 = OxmlElement('w:fldChar')
        fldChar4.set(qn('w:fldCharType'), 'end')

        r_element = run._r
        r_element.append(fldChar)
        r_element.append(instrText)
        r_element.append(fldChar2)
        r_element.append(fldChar4)

        p_element = paragraph._p
        print(p_element.xml)


    def contains_toc(self):
        body_elements = self.xdoc._body._body
        #extract those wrapped in <w:r> tag
        rs = body_elements.xpath('.//w:r')
        #check if style is hyperlink (toc)
        table_of_content = []
        for r in rs:
            if r.style:
                if "hyperlink" in r.style.lower() or "lienhypertexte" in r.style.lower():
                    table_of_content.append(r.text)
        if len(table_of_content) > 0:
            return True
        else:
            return False
        
    def handle_content_before_toc(self):
        """
        TODO: use a function to determine the type of the doc
        """
        if not "data/templates" in self.path and not "temp/generated_files" in self.path: #PREMIER PROBLEME
            self.delete_content_before_toc()

 
    def delete_style(self, style_name):
        self.styles.delete_style(style_name)
        self.save_as_docx()

    def change_bullet_style(self, style_name, template_style_name, template) -> {}:
        """
        TODO: recode to respect the OOP
        suppression of a paragraph with a bullet and rewriting of the bullet with style_name in the target styple (template_style_name)
        real_style_name = core style name with no indentation
        level = indentation level
        """
        i = 0
        real_style_name = style_name.split(' : ')[0]
        level = int(style_name.split(' = ')[1])
        while i < len(self.xdoc.paragraphs):
            para = self.xdoc.paragraphs[i]
            if real_style_name == para.style.name and self.paragraphs[i].is_list and self.paragraphs[i].list_indentation == level:
                #print xml of paragraph and retrieve the level
                self.delete_paragraph(self.xdoc.paragraphs[i])
                self.paragraphs.pop(i)
                if i == len(self.xdoc.paragraphs):
                    paragraph_inserted = self.xdoc.add_paragraph(para.text, style=template.styles.get_style_from_name(template_style_name))
                    self.paragraphs.insert(i, Paragraph(paragraph_inserted, self.id_, i, self))
                else:
                    paragraph_inserted = self.xdoc.paragraphs[i].insert_paragraph_before(para.text, style=template.styles.get_style_from_name(template_style_name))
                    self.paragraphs.insert(i, Paragraph(paragraph_inserted, self.id_, i, self))
            i += 1
        log_dict = self.change_bullet_style_in_tables(style_name, template_style_name, template)
        self.save_as_docx()
        return log_dict
    
    def change_bullet_style_in_tables(self, style_name, template_style_name, template) -> {}:
        """
        same as abobe
        TODO: ... same as above
        """
        i = 0
        real_style_name = style_name.split(' : ')[0]
        level = int(style_name.split(' = ')[1])
        for table in self.xdoc.tables:
            for row in table.rows:
                for cell in row.cells:
                    i = 0
                    for para in cell.paragraphs:
                        real_para = Paragraph(para, self.id_, i, self)
                        if real_style_name == para.style.name and real_para.is_list and real_para.list_indentation == level:
                            self.delete_paragraph(para)
                            if i == len(cell.paragraphs):
                                cell.add_paragraph(real_para.text, style=template.styles.get_style_from_name(template_style_name))
                            else:
                                cell.paragraphs[i].insert_paragraph_before(real_para.text, style=template.styles.get_style_from_name(template_style_name))
                        i += 1
        log = f"Le style {style_name} a été changé en {template_style_name}"
        log_dict = {'list_mapping': log}
        return log_dict
    
    def table_insertion(self, index: str, content: dict):
        #the index is the index of the block in the docx file where to insert the table
        #the content is the content of the table with the following format:
        #content = {
        #    "headers": ["header1", "header2", "header3"],
        #    "rows": [
        #        ["row1", "row1", "row1"],
        #        ["row2", "row2", "row2"],
        #        ["row3", "row3", "row3"],
        #    ]
        #}
        list_of_indexes = index.split(".")
        index_in_list = [eval(i) for i in list_of_indexes]
        #find the container which has the index
        paragraph : Paragraph = None
        containers : [Container] = self.container.containers
        for c in containers:
            if c.index == index_in_list:
                if c.title:
                    paragraph = c.title
                else:
                    paragraph = c.paragraphs[0]
                break
        if not paragraph:
            print("The index is not valid")
            return None
        
        table = self.xdoc.add_table(rows = len(content["rows"]) + 1, cols = len(content["headers"]))
        #set style below

        
        #add the header
        for i, header in enumerate(content["headers"]):
            table.cell(0, i).text = header
        #add the rows
        for i, row in enumerate(content["rows"]):
            for j, cell in enumerate(row):
                table.cell(i+1, j).text = cell
        #insert the table after the paragraph
        self.move_table_after(table, paragraph.xparagraph)
        #save the doc
        self.save_as_docx()
        return table
    
    def delete_table(self, table):
        table._element.getparent().remove(table._element)
        table._element = table._row = None
        self.save_as_docx()

    def move_table_after(self, table, paragraph):
        tbl, p = table._tbl, paragraph._p
        p.addnext(tbl)
    

    def remove_all_but_last_section(self):
        """
        not used
        """
        sectPrs = self.xdoc._element.xpath(".//w:pPr/w:sectPr")
        for sectPr in sectPrs:
            print(sectPr)
            sectPr.getparent().remove(sectPr)