File size: 18,660 Bytes
498db6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25eac81
498db6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445a883
 
498db6b
 
ecf147c
498db6b
 
 
 
 
 
445a883
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
498db6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
import docx
from src.tools.doc_tools import *
from docxcompose.composer import Composer
from docx import Document as Document_compose
from docx.enum.table import WD_TABLE_ALIGNMENT
from src.domain.container import Container
from src.domain.container_requirements import Container_requirements
from src.domain.paragraph import Paragraph
from src.domain.styles import Styles
import shutil
import os
from docx.oxml.ns import qn
from docx.oxml.shared import OxmlElement
from docx.shared import Inches
from src.tools.pretty_print import pretty_print_block_and_indexes, pretty_print_paragraphs
from src.tools.index_creation import set_indexes
from src.reader.reader_for_requirements import WordReader

class Doc:

    """
    TODO: mettre _ devant les méthodes internes
    """

    def __init__(self, path='', id_=None):
        self.xdoc = docx.Document(path)
        self.title = get_title(path)
        self.name = self.title.split('.')[0]
        self.id_ = id(self)
        self.path = path
        self.paragraphs = [Paragraph(xp, self.id_, i, self) for (i, xp) in enumerate(self.xdoc.paragraphs)]
        self.handle_content_before_toc()
        self.requirements_paragraphs = WordReader(self.path).paragraphs if not "data/templates" in self.path else []
        self.container = Container(self.paragraphs, father=self)
        self.container_requirements = Container_requirements(self.requirements_paragraphs, father=self)
        set_indexes(self.container, self.path)
        set_indexes(self.container_requirements, self.path)
        self.styles = Styles(self.xdoc.styles)
        self.tasks = [c.get_fulltask(self.container.one_liner) for c in self.container.containers if c.task]
        self.blocks = self.get_blocks()
        self.blocks_requirements = self.get_blocks_requirements()


    def copy(self, new_doc_path):
        shutil.copyfile(self.path, new_doc_path)
        new_doc = Doc(new_doc_path)
        new_doc.save_as_docx(new_doc_path)
        return new_doc

    def clear(self):
        os.remove(self.path)

    def apply_template(self, template, options_list):
        """
        TODO: mettre le texte dans un fichier de config
        """
        log = []
        j = 0
        if ("Justifier le texte (Normal)" in options_list):
            log.append("Le contenu du document a été justifié")
            self.justify_content()
            self.save_as_docx()
        if("Recentrer les tableaux" in options_list):
            j = self.center_tables()
            log.append(f"{j} table{'s' if j>1 else ''} centrée{'s' if j>1 else ''}")
            self.save_as_docx()
        log.append(f"Le template {template.name} a été ajouté avant le document")
        self.rearrange_tables()
        self.save_as_docx()
        log = self.styles.apply_from(template.styles, log)
        self.save_as_docx()
        self.delete_toc(template)
        self.normal_style_for_empty_paragraphs()
        self.save_as_docx()
        self.append_doc_to_template_and_update_toc(template)
        return log

    def copy_one_style(self, src_style_name: str, dest_style_name: str, template):
        style_dest = template.styles.get_style_from_name(dest_style_name)
        src_style = self.styles.get_style_from_name(src_style_name)
        if src_style:
            log = self.styles.copy_one_style(src_style, style_dest)
            return log
        else:
            return None

    def get_different_styles_with_template(self, template):
        styles_used_in_doc = self.get_all_styles_used_in_doc_except_list()
        different_styles = get_difference_with_template(styles_used_in_doc, template)
        return different_styles

    def save_as_docx(self, path: str = ''):
        path = path if path else self.path
        self.path = path
        self.xdoc.save(path)

    def get_blocks(self):

        """
        TODO: do a function that determines if the Doc is not a template nor a generated doc
        TODO: merge the two functions for getting blocks
        TODO: why do we need two functions? in the end, we need only 
        """
        if "temp/generated_files" in self.path or "data/templates" in self.path:
            return

        def from_list_to_str(index_list):
            index_str = str(index_list[0])
            for el in index_list[1:]:
                index_str += '.' + str(el)
            return index_str

        blocks = self.container.blocks
        for block in blocks:
            block.doc = self.title
            block.index = from_list_to_str(block.index)
        return blocks
    

    def get_blocks_requirements(self):
        if "temp/generated_files" in self.path or "data/templates" in self.path:
            return

        def from_list_to_str(index_list):
            index_str = str(index_list[0])
            for el in index_list[1:]:
                index_str += '.' + str(el)
            return index_str

        blocks = self.container_requirements.blocks
        for block in blocks:
            block.doc = self.title
            block.index = from_list_to_str(block.index) if not isinstance(block.index, str) else block.index
            # print(f"{block.index}: {block.content}")
            # print("--------------------------------------------------")
        return blocks

    @property
    def toc(self):
        """
        return the paragraphs that are in the table of contents
        """
        return [p for p in self.paragraphs if p.toc]
    
    @property
    def structure(self):
        return self.container.structure

    def replace_tasks(self, resolutions: [str]):
        if len(resolutions) == len(self.tasks):  # exception to be handled
            p_tasks = [p for p in self.paragraphs if p.type == 'task']
            for p, r in zip(p_tasks, resolutions):
                p.set_text(r)
        else:
            print(f"résolutions : {len(resolutions)} != {len(self.tasks)} tasks")
        return self

    def get_paragraphs(self):
        return self.container.all_paragraphs
    
    def get_text_from_paragraphs(self):
        return [p.text for p in self.paragraphs]
    
    def check_document(self):
        """
        debugging function to analyse the doc
        """
        picCount = 0
        tabCount = 0
        for paragraph in self.xdoc.paragraphs:
            if picCount < len(self.xdoc.inline_shapes):
                print('\033[1mPicture \033[0m')
                picCount += 1
            elif paragraph.text:
                print(paragraph.text)
            elif tabCount < len(self.xdoc.tables):
                table = self.xdoc.tables[tabCount]
                data = []
                keys = None
                for i, row in enumerate(table.rows):
                    text = (cell.text for cell in row.cells)
                    if i == 0:
                        keys = tuple(text)
                        continue
                    row_data = dict(zip(keys, text))
                    data.append(row_data)
                print('\033[1mTable:\033[0m', data)
                tabCount += 1
            else:
                print('\033[1mEmpty paragraph\033[0m')

    
    def center_tables(self):
        j = 0
        for table in self.xdoc.tables:
            j += 1
            table.alignment = WD_TABLE_ALIGNMENT.CENTER
        return j
    
    def rearrange_tables(self):
        """
        Hotfix for autofit.
        directly from XML 
        """
        for t_idx, _ in enumerate(self.xdoc.tables):
            self.xdoc.tables[t_idx].autofit = True
            self.xdoc.tables[t_idx].allow_autofit = True
            self.xdoc.tables[t_idx]._tblPr.xpath("./w:tblW")[0].attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type"] = "auto"
            for row_idx, _ in enumerate(self.xdoc.tables[t_idx].rows):
                for cell_idx, _ in enumerate(self.xdoc.tables[t_idx].rows[row_idx].cells):
                    self.xdoc.tables[t_idx].rows[row_idx].cells[cell_idx]._tc.tcPr.tcW.type = 'auto'
                    self.xdoc.tables[t_idx].rows[row_idx].cells[cell_idx]._tc.tcPr.tcW.w = 0
    
    def center_images(self):
        """
        works only for images in the run
        """
        for paragraph in self.paragraphs:
            paragraph.center_paragraph()

    def justify_content(self):
        """
        applied only to normal style
        """
        for paragraph in self.paragraphs:
            paragraph.justify_paragraph()

    def number_images_in_doc(self):
        """
        for debug = not used
        """
        picCount = 0
        for _ in self.xdoc.paragraphs:
            if picCount < len(self.xdoc.inline_shapes):
                print('\033[1mPicture \033[0m')
                picCount += 1
        return picCount
    
    def get_all_styles_used_in_doc_except_list(self):
        return self.container.get_all_styles_used_in_doc_except_list()

    def get_list_styles(self):
        return self.container.get_list_styles()
    
    def retrieve_number_of_misapplied_styles(self):
        return self.container.retrieve_number_of_misapplied_styles()

    def normal_style_for_empty_paragraphs(self):
        for p in self.paragraphs:
            if p.blank and not p.toc:
                p.set_style(self.styles.get_style_from_name("Normal"))
        self.save_as_docx()


    def append_doc_to_template_and_update_toc(self,template):
        """
        TODO: rename Document_compose into XDocument
        Document_compose = plain old Document from docx
        Composer = from docxcompose => allows to modify several documents
        """
        master = Document_compose(template.path)
        composer = Composer(master)
        doc = Document_compose(self.path)
        composer.append(doc)
        composer.save(self.path)
        new_doc = Doc(self.path)
        update_table_of_contents(new_doc.xdoc)
        new_doc.save_as_docx()
    
    def delete_content_before_toc(self):
        """
        TODO: loop with paragraph (ours)
        """
        if self.contains_toc():
            for line in self.xdoc.paragraphs:
                if "toc" in line.style.name:
                    break
                if len(line.text) == 0:
                    self.delete_paragraph(line)
                    self.paragraphs.pop(0)
                    continue
                if 'toc' not in line.style.name:
                    self.delete_paragraph(line)
                    self.paragraphs.pop(0)
            self.save_as_docx()
    
    def delete_paragraph(self, paragraph):
        """
        TODO: to be put in paragraph
        """
        p = paragraph._element
        p.getparent().remove(p)
        paragraph._p = paragraph._element = None

    def delete_toc(self,template):
        """
        TODO: loop with paragraph (ours)
        """
        index_to_insert = None
        for index, p in enumerate(template.paragraphs):
            index_to_insert = index
            if ("table des matières" or "table of contents") in p.text.lower():
                index_to_insert += 1
                break
        xparagraphs_toc = [p.xparagraph for p in self.toc]
        for p in xparagraphs_toc:
            self.delete_paragraph(p)
            self.paragraphs.pop(0)
        self.save_as_docx()

    
    def insert_table_of_content(self,index):
        """
        To create a TOC (not used here)
        """
        paragraph = self.xdoc.paragraphs[index].insert_paragraph_before("", "Normal")
        paragraph.paragraph_format.space_before = Inches(0)
        paragraph.paragraph_format.space_after = Inches(0)
        run = paragraph.add_run()

        fldChar = OxmlElement('w:fldChar')  # creates a new element
        fldChar.set(qn('w:fldCharType'), 'begin')  # sets attribute on element

        instrText = OxmlElement('w:instrText')
        instrText.set(qn('xml:space'), 'preserve')  # sets attribute on element
        instrText.text = 'TOC \\o "1-5" \\h \\z \\u'   # change 1-3 depending on heading levels you need

        fldChar2 = OxmlElement('w:fldChar')
        fldChar2.set(qn('w:fldCharType'), 'separate')

        fldChar3 = OxmlElement('w:t')
        fldChar3.text = "Right-click to update field."
        fldChar3 = OxmlElement('w:updateFields') 
        fldChar3.set(qn('w:val'), 'true') 
        fldChar2.append(fldChar3)

        fldChar4 = OxmlElement('w:fldChar')
        fldChar4.set(qn('w:fldCharType'), 'end')

        r_element = run._r
        r_element.append(fldChar)
        r_element.append(instrText)
        r_element.append(fldChar2)
        r_element.append(fldChar4)

        p_element = paragraph._p
        print(p_element.xml)


    def contains_toc(self):
        body_elements = self.xdoc._body._body
        #extract those wrapped in <w:r> tag
        rs = body_elements.xpath('.//w:r')
        #check if style is hyperlink (toc)
        table_of_content = []
        for r in rs:
            if r.style:
                if "hyperlink" in r.style.lower() or "lienhypertexte" in r.style.lower():
                    table_of_content.append(r.text)
        if len(table_of_content) > 0:
            return True
        else:
            return False
        
    def handle_content_before_toc(self):
        """
        TODO: use a function to determine the type of the doc
        """
        if not "data/templates" in self.path and not "temp/generated_files" in self.path: #PREMIER PROBLEME
            self.delete_content_before_toc()

 
    def delete_style(self, style_name):
        self.styles.delete_style(style_name)
        self.save_as_docx()

    def change_bullet_style(self, style_name, template_style_name, template) -> {}:
        """
        TODO: recode to respect the OOP
        suppression of a paragraph with a bullet and rewriting of the bullet with style_name in the target styple (template_style_name)
        real_style_name = core style name with no indentation
        level = indentation level
        """
        i = 0
        real_style_name = style_name.split(' : ')[0]
        level = int(style_name.split(' = ')[1])
        while i < len(self.xdoc.paragraphs):
            para = self.xdoc.paragraphs[i]
            if real_style_name == para.style.name and self.paragraphs[i].is_list and self.paragraphs[i].list_indentation == level:
                #print xml of paragraph and retrieve the level
                self.delete_paragraph(self.xdoc.paragraphs[i])
                self.paragraphs.pop(i)
                if i == len(self.xdoc.paragraphs):
                    paragraph_inserted = self.xdoc.add_paragraph(para.text, style=template.styles.get_style_from_name(template_style_name))
                    self.paragraphs.insert(i, Paragraph(paragraph_inserted, self.id_, i, self))
                else:
                    paragraph_inserted = self.xdoc.paragraphs[i].insert_paragraph_before(para.text, style=template.styles.get_style_from_name(template_style_name))
                    self.paragraphs.insert(i, Paragraph(paragraph_inserted, self.id_, i, self))
            i += 1
        log_dict = self.change_bullet_style_in_tables(style_name, template_style_name, template)
        self.save_as_docx()
        return log_dict
    
    def change_bullet_style_in_tables(self, style_name, template_style_name, template) -> {}:
        """
        same as abobe
        TODO: ... same as above
        """
        i = 0
        real_style_name = style_name.split(' : ')[0]
        level = int(style_name.split(' = ')[1])
        for table in self.xdoc.tables:
            for row in table.rows:
                for cell in row.cells:
                    i = 0
                    for para in cell.paragraphs:
                        real_para = Paragraph(para, self.id_, i, self)
                        if real_style_name == para.style.name and real_para.is_list and real_para.list_indentation == level:
                            self.delete_paragraph(para)
                            if i == len(cell.paragraphs):
                                cell.add_paragraph(real_para.text, style=template.styles.get_style_from_name(template_style_name))
                            else:
                                cell.paragraphs[i].insert_paragraph_before(real_para.text, style=template.styles.get_style_from_name(template_style_name))
                        i += 1
        log = f"Le style {style_name} a été changé en {template_style_name}"
        log_dict = {'list_mapping': log}
        return log_dict
    
    def table_insertion(self, index: str, content: dict):
        #the index is the index of the block in the docx file where to insert the table
        #the content is the content of the table with the following format:
        #content = {
        #    "headers": ["header1", "header2", "header3"],
        #    "rows": [
        #        ["row1", "row1", "row1"],
        #        ["row2", "row2", "row2"],
        #        ["row3", "row3", "row3"],
        #    ]
        #}
        list_of_indexes = index.split(".")
        index_in_list = [eval(i) for i in list_of_indexes]
        #find the container which has the index
        paragraph : Paragraph = None
        containers : [Container] = self.container.containers
        for c in containers:
            if c.index == index_in_list:
                if c.title:
                    paragraph = c.title
                else:
                    paragraph = c.paragraphs[0]
                break
        if not paragraph:
            print("The index is not valid")
            return None
        
        table = self.xdoc.add_table(rows = len(content["rows"]) + 1, cols = len(content["headers"]))
        #set style below

        
        #add the header
        for i, header in enumerate(content["headers"]):
            table.cell(0, i).text = header
        #add the rows
        for i, row in enumerate(content["rows"]):
            for j, cell in enumerate(row):
                table.cell(i+1, j).text = cell
        #insert the table after the paragraph
        self.move_table_after(table, paragraph.xparagraph)
        #save the doc
        self.save_as_docx()
        return table
    
    def delete_table(self, table):
        table._element.getparent().remove(table._element)
        table._element = table._row = None
        self.save_as_docx()

    def move_table_after(self, table, paragraph):
        tbl, p = table._tbl, paragraph._p
        p.addnext(tbl)
    

    def remove_all_but_last_section(self):
        """
        not used
        """
        sectPrs = self.xdoc._element.xpath(".//w:pPr/w:sectPr")
        for sectPr in sectPrs:
            print(sectPr)
            sectPr.getparent().remove(sectPr)