import docx from src.tools.doc_tools import * from docxcompose.composer import Composer from docx import Document as Document_compose from docx.enum.table import WD_TABLE_ALIGNMENT from src.domain.container import Container from src.domain.container_requirements import Container_requirements from src.domain.paragraph import Paragraph from src.domain.styles import Styles import shutil import os from docx.oxml.ns import qn from docx.oxml.shared import OxmlElement from docx.shared import Inches from src.tools.pretty_print import pretty_print_block_and_indexes, pretty_print_paragraphs from src.tools.index_creation import set_indexes from src.reader.reader_for_requirements import WordReader class Doc: """ TODO: mettre _ devant les méthodes internes """ def __init__(self, path='', id_=None): self.xdoc = docx.Document(path) self.title = get_title(path) self.name = self.title.split('.')[0] self.id_ = id(self) self.path = path self.paragraphs = [Paragraph(xp, self.id_, i, self) for (i, xp) in enumerate(self.xdoc.paragraphs)] self.handle_content_before_toc() self.requirements_paragraphs = WordReader(self.path).paragraphs if not "data/templates" in self.path else [] self.container = Container(self.paragraphs, father=self) self.container_requirements = Container_requirements(self.requirements_paragraphs, father=self) set_indexes(self.container, self.path) set_indexes(self.container_requirements, self.path) self.styles = Styles(self.xdoc.styles) self.tasks = [c.get_fulltask(self.container.one_liner) for c in self.container.containers if c.task] self.blocks = self.get_blocks() self.blocks_requirements = self.get_blocks_requirements() def copy(self, new_doc_path): shutil.copyfile(self.path, new_doc_path) new_doc = Doc(new_doc_path) new_doc.save_as_docx(new_doc_path) return new_doc def clear(self): os.remove(self.path) def apply_template(self, template, options_list): """ TODO: mettre le texte dans un fichier de config """ log = [] j = 0 if ("Justifier le texte (Normal)" in options_list): log.append("Le contenu du document a été justifié") self.justify_content() self.save_as_docx() if("Recentrer les tableaux" in options_list): j = self.center_tables() log.append(f"{j} table{'s' if j>1 else ''} centrée{'s' if j>1 else ''}") self.save_as_docx() log.append(f"Le template {template.name} a été ajouté avant le document") self.rearrange_tables() self.save_as_docx() log = self.styles.apply_from(template.styles, log) self.save_as_docx() self.delete_toc(template) self.normal_style_for_empty_paragraphs() self.save_as_docx() self.append_doc_to_template_and_update_toc(template) return log def copy_one_style(self, src_style_name: str, dest_style_name: str, template): style_dest = template.styles.get_style_from_name(dest_style_name) src_style = self.styles.get_style_from_name(src_style_name) if src_style: log = self.styles.copy_one_style(src_style, style_dest) return log else: return None def get_different_styles_with_template(self, template): styles_used_in_doc = self.get_all_styles_used_in_doc_except_list() different_styles = get_difference_with_template(styles_used_in_doc, template) return different_styles def save_as_docx(self, path: str = ''): path = path if path else self.path self.path = path self.xdoc.save(path) def get_blocks(self): """ TODO: do a function that determines if the Doc is not a template nor a generated doc TODO: merge the two functions for getting blocks TODO: why do we need two functions? in the end, we need only """ if "temp/generated_files" in self.path or "data/templates" in self.path: return def from_list_to_str(index_list): index_str = str(index_list[0]) for el in index_list[1:]: index_str += '.' + str(el) return index_str blocks = self.container.blocks for block in blocks: block.doc = self.title block.index = from_list_to_str(block.index) return blocks def get_blocks_requirements(self): if "temp/generated_files" in self.path or "data/templates" in self.path: return def from_list_to_str(index_list): index_str = str(index_list[0]) for el in index_list[1:]: index_str += '.' + str(el) return index_str blocks = self.container_requirements.blocks for block in blocks: block.doc = self.title block.index = from_list_to_str(block.index) if not isinstance(block.index, str) else block.index # print(f"{block.index}: {block.content}") # print("--------------------------------------------------") return blocks @property def toc(self): """ return the paragraphs that are in the table of contents """ return [p for p in self.paragraphs if p.toc] @property def structure(self): return self.container.structure def replace_tasks(self, resolutions: [str]): if len(resolutions) == len(self.tasks): # exception to be handled p_tasks = [p for p in self.paragraphs if p.type == 'task'] for p, r in zip(p_tasks, resolutions): p.set_text(r) else: print(f"résolutions : {len(resolutions)} != {len(self.tasks)} tasks") return self def get_paragraphs(self): return self.container.all_paragraphs def get_text_from_paragraphs(self): return [p.text for p in self.paragraphs] def check_document(self): """ debugging function to analyse the doc """ picCount = 0 tabCount = 0 for paragraph in self.xdoc.paragraphs: if picCount < len(self.xdoc.inline_shapes): print('\033[1mPicture \033[0m') picCount += 1 elif paragraph.text: print(paragraph.text) elif tabCount < len(self.xdoc.tables): table = self.xdoc.tables[tabCount] data = [] keys = None for i, row in enumerate(table.rows): text = (cell.text for cell in row.cells) if i == 0: keys = tuple(text) continue row_data = dict(zip(keys, text)) data.append(row_data) print('\033[1mTable:\033[0m', data) tabCount += 1 else: print('\033[1mEmpty paragraph\033[0m') def center_tables(self): j = 0 for table in self.xdoc.tables: j += 1 table.alignment = WD_TABLE_ALIGNMENT.CENTER return j def rearrange_tables(self): """ Hotfix for autofit. directly from XML """ for t_idx, _ in enumerate(self.xdoc.tables): self.xdoc.tables[t_idx].autofit = True self.xdoc.tables[t_idx].allow_autofit = True self.xdoc.tables[t_idx]._tblPr.xpath("./w:tblW")[0].attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type"] = "auto" for row_idx, _ in enumerate(self.xdoc.tables[t_idx].rows): for cell_idx, _ in enumerate(self.xdoc.tables[t_idx].rows[row_idx].cells): self.xdoc.tables[t_idx].rows[row_idx].cells[cell_idx]._tc.tcPr.tcW.type = 'auto' self.xdoc.tables[t_idx].rows[row_idx].cells[cell_idx]._tc.tcPr.tcW.w = 0 def center_images(self): """ works only for images in the run """ for paragraph in self.paragraphs: paragraph.center_paragraph() def justify_content(self): """ applied only to normal style """ for paragraph in self.paragraphs: paragraph.justify_paragraph() def number_images_in_doc(self): """ for debug = not used """ picCount = 0 for _ in self.xdoc.paragraphs: if picCount < len(self.xdoc.inline_shapes): print('\033[1mPicture \033[0m') picCount += 1 return picCount def get_all_styles_used_in_doc_except_list(self): return self.container.get_all_styles_used_in_doc_except_list() def get_list_styles(self): return self.container.get_list_styles() def retrieve_number_of_misapplied_styles(self): return self.container.retrieve_number_of_misapplied_styles() def normal_style_for_empty_paragraphs(self): for p in self.paragraphs: if p.blank and not p.toc: p.set_style(self.styles.get_style_from_name("Normal")) self.save_as_docx() def append_doc_to_template_and_update_toc(self,template): """ TODO: rename Document_compose into XDocument Document_compose = plain old Document from docx Composer = from docxcompose => allows to modify several documents """ master = Document_compose(template.path) composer = Composer(master) doc = Document_compose(self.path) composer.append(doc) composer.save(self.path) new_doc = Doc(self.path) update_table_of_contents(new_doc.xdoc) new_doc.save_as_docx() def delete_content_before_toc(self): """ TODO: loop with paragraph (ours) """ if self.contains_toc(): for line in self.xdoc.paragraphs: if "toc" in line.style.name: break if len(line.text) == 0: self.delete_paragraph(line) self.paragraphs.pop(0) continue if 'toc' not in line.style.name: self.delete_paragraph(line) self.paragraphs.pop(0) self.save_as_docx() def delete_paragraph(self, paragraph): """ TODO: to be put in paragraph """ p = paragraph._element p.getparent().remove(p) paragraph._p = paragraph._element = None def delete_toc(self,template): """ TODO: loop with paragraph (ours) """ index_to_insert = None for index, p in enumerate(template.paragraphs): index_to_insert = index if ("table des matières" or "table of contents") in p.text.lower(): index_to_insert += 1 break xparagraphs_toc = [p.xparagraph for p in self.toc] for p in xparagraphs_toc: self.delete_paragraph(p) self.paragraphs.pop(0) self.save_as_docx() def insert_table_of_content(self,index): """ To create a TOC (not used here) """ paragraph = self.xdoc.paragraphs[index].insert_paragraph_before("", "Normal") paragraph.paragraph_format.space_before = Inches(0) paragraph.paragraph_format.space_after = Inches(0) run = paragraph.add_run() fldChar = OxmlElement('w:fldChar') # creates a new element fldChar.set(qn('w:fldCharType'), 'begin') # sets attribute on element instrText = OxmlElement('w:instrText') instrText.set(qn('xml:space'), 'preserve') # sets attribute on element instrText.text = 'TOC \\o "1-5" \\h \\z \\u' # change 1-3 depending on heading levels you need fldChar2 = OxmlElement('w:fldChar') fldChar2.set(qn('w:fldCharType'), 'separate') fldChar3 = OxmlElement('w:t') fldChar3.text = "Right-click to update field." fldChar3 = OxmlElement('w:updateFields') fldChar3.set(qn('w:val'), 'true') fldChar2.append(fldChar3) fldChar4 = OxmlElement('w:fldChar') fldChar4.set(qn('w:fldCharType'), 'end') r_element = run._r r_element.append(fldChar) r_element.append(instrText) r_element.append(fldChar2) r_element.append(fldChar4) p_element = paragraph._p print(p_element.xml) def contains_toc(self): body_elements = self.xdoc._body._body #extract those wrapped in tag rs = body_elements.xpath('.//w:r') #check if style is hyperlink (toc) table_of_content = [] for r in rs: if r.style: if "hyperlink" in r.style.lower() or "lienhypertexte" in r.style.lower(): table_of_content.append(r.text) if len(table_of_content) > 0: return True else: return False def handle_content_before_toc(self): """ TODO: use a function to determine the type of the doc """ if not "data/templates" in self.path and not "temp/generated_files" in self.path: #PREMIER PROBLEME self.delete_content_before_toc() def delete_style(self, style_name): self.styles.delete_style(style_name) self.save_as_docx() def change_bullet_style(self, style_name, template_style_name, template) -> {}: """ TODO: recode to respect the OOP suppression of a paragraph with a bullet and rewriting of the bullet with style_name in the target styple (template_style_name) real_style_name = core style name with no indentation level = indentation level """ i = 0 real_style_name = style_name.split(' : ')[0] level = int(style_name.split(' = ')[1]) while i < len(self.xdoc.paragraphs): para = self.xdoc.paragraphs[i] if real_style_name == para.style.name and self.paragraphs[i].is_list and self.paragraphs[i].list_indentation == level: #print xml of paragraph and retrieve the level self.delete_paragraph(self.xdoc.paragraphs[i]) self.paragraphs.pop(i) if i == len(self.xdoc.paragraphs): paragraph_inserted = self.xdoc.add_paragraph(para.text, style=template.styles.get_style_from_name(template_style_name)) self.paragraphs.insert(i, Paragraph(paragraph_inserted, self.id_, i, self)) else: paragraph_inserted = self.xdoc.paragraphs[i].insert_paragraph_before(para.text, style=template.styles.get_style_from_name(template_style_name)) self.paragraphs.insert(i, Paragraph(paragraph_inserted, self.id_, i, self)) i += 1 log_dict = self.change_bullet_style_in_tables(style_name, template_style_name, template) self.save_as_docx() return log_dict def change_bullet_style_in_tables(self, style_name, template_style_name, template) -> {}: """ same as abobe TODO: ... same as above """ i = 0 real_style_name = style_name.split(' : ')[0] level = int(style_name.split(' = ')[1]) for table in self.xdoc.tables: for row in table.rows: for cell in row.cells: i = 0 for para in cell.paragraphs: real_para = Paragraph(para, self.id_, i, self) if real_style_name == para.style.name and real_para.is_list and real_para.list_indentation == level: self.delete_paragraph(para) if i == len(cell.paragraphs): cell.add_paragraph(real_para.text, style=template.styles.get_style_from_name(template_style_name)) else: cell.paragraphs[i].insert_paragraph_before(real_para.text, style=template.styles.get_style_from_name(template_style_name)) i += 1 log = f"Le style {style_name} a été changé en {template_style_name}" log_dict = {'list_mapping': log} return log_dict def table_insertion(self, index: str, content: dict): #the index is the index of the block in the docx file where to insert the table #the content is the content of the table with the following format: #content = { # "headers": ["header1", "header2", "header3"], # "rows": [ # ["row1", "row1", "row1"], # ["row2", "row2", "row2"], # ["row3", "row3", "row3"], # ] #} list_of_indexes = index.split(".") index_in_list = [eval(i) for i in list_of_indexes] #find the container which has the index paragraph : Paragraph = None containers : [Container] = self.container.containers for c in containers: if c.index == index_in_list: if c.title: paragraph = c.title else: paragraph = c.paragraphs[0] break if not paragraph: print("The index is not valid") return None table = self.xdoc.add_table(rows = len(content["rows"]) + 1, cols = len(content["headers"])) #set style below #add the header for i, header in enumerate(content["headers"]): table.cell(0, i).text = header #add the rows for i, row in enumerate(content["rows"]): for j, cell in enumerate(row): table.cell(i+1, j).text = cell #insert the table after the paragraph self.move_table_after(table, paragraph.xparagraph) #save the doc self.save_as_docx() return table def delete_table(self, table): table._element.getparent().remove(table._element) table._element = table._row = None self.save_as_docx() def move_table_after(self, table, paragraph): tbl, p = table._tbl, paragraph._p p.addnext(tbl) def remove_all_but_last_section(self): """ not used """ sectPrs = self.xdoc._element.xpath(".//w:pPr/w:sectPr") for sectPr in sectPrs: print(sectPr) sectPr.getparent().remove(sectPr)