GenProp / src /domain /doc.py
adrien.aribaut-gaudin
fix: comment in the doc file
ecf147c
import docx
from src.tools.doc_tools import *
from docxcompose.composer import Composer
from docx import Document as Document_compose
from docx.enum.table import WD_TABLE_ALIGNMENT
from src.domain.container import Container
from src.domain.container_requirements import Container_requirements
from src.domain.paragraph import Paragraph
from src.domain.styles import Styles
import shutil
import os
from docx.oxml.ns import qn
from docx.oxml.shared import OxmlElement
from docx.shared import Inches
from src.tools.pretty_print import pretty_print_block_and_indexes, pretty_print_paragraphs
from src.tools.index_creation import set_indexes
from src.reader.reader_for_requirements import WordReader
class Doc:
"""
TODO: mettre _ devant les méthodes internes
"""
def __init__(self, path='', id_=None):
self.xdoc = docx.Document(path)
self.title = get_title(path)
self.name = self.title.split('.')[0]
self.id_ = id(self)
self.path = path
self.paragraphs = [Paragraph(xp, self.id_, i, self) for (i, xp) in enumerate(self.xdoc.paragraphs)]
self.handle_content_before_toc()
self.requirements_paragraphs = WordReader(self.path).paragraphs if not "data/templates" in self.path else []
self.container = Container(self.paragraphs, father=self)
self.container_requirements = Container_requirements(self.requirements_paragraphs, father=self)
set_indexes(self.container, self.path)
set_indexes(self.container_requirements, self.path)
self.styles = Styles(self.xdoc.styles)
self.tasks = [c.get_fulltask(self.container.one_liner) for c in self.container.containers if c.task]
self.blocks = self.get_blocks()
self.blocks_requirements = self.get_blocks_requirements()
def copy(self, new_doc_path):
shutil.copyfile(self.path, new_doc_path)
new_doc = Doc(new_doc_path)
new_doc.save_as_docx(new_doc_path)
return new_doc
def clear(self):
os.remove(self.path)
def apply_template(self, template, options_list):
"""
TODO: mettre le texte dans un fichier de config
"""
log = []
j = 0
if ("Justifier le texte (Normal)" in options_list):
log.append("Le contenu du document a été justifié")
self.justify_content()
self.save_as_docx()
if("Recentrer les tableaux" in options_list):
j = self.center_tables()
log.append(f"{j} table{'s' if j>1 else ''} centrée{'s' if j>1 else ''}")
self.save_as_docx()
log.append(f"Le template {template.name} a été ajouté avant le document")
self.rearrange_tables()
self.save_as_docx()
log = self.styles.apply_from(template.styles, log)
self.save_as_docx()
self.delete_toc(template)
self.normal_style_for_empty_paragraphs()
self.save_as_docx()
self.append_doc_to_template_and_update_toc(template)
return log
def copy_one_style(self, src_style_name: str, dest_style_name: str, template):
style_dest = template.styles.get_style_from_name(dest_style_name)
src_style = self.styles.get_style_from_name(src_style_name)
if src_style:
log = self.styles.copy_one_style(src_style, style_dest)
return log
else:
return None
def get_different_styles_with_template(self, template):
styles_used_in_doc = self.get_all_styles_used_in_doc_except_list()
different_styles = get_difference_with_template(styles_used_in_doc, template)
return different_styles
def save_as_docx(self, path: str = ''):
path = path if path else self.path
self.path = path
self.xdoc.save(path)
def get_blocks(self):
"""
TODO: do a function that determines if the Doc is not a template nor a generated doc
TODO: merge the two functions for getting blocks
TODO: why do we need two functions? in the end, we need only
"""
if "temp/generated_files" in self.path or "data/templates" in self.path:
return
def from_list_to_str(index_list):
index_str = str(index_list[0])
for el in index_list[1:]:
index_str += '.' + str(el)
return index_str
blocks = self.container.blocks
for block in blocks:
block.doc = self.title
block.index = from_list_to_str(block.index)
return blocks
def get_blocks_requirements(self):
if "temp/generated_files" in self.path or "data/templates" in self.path:
return
def from_list_to_str(index_list):
index_str = str(index_list[0])
for el in index_list[1:]:
index_str += '.' + str(el)
return index_str
blocks = self.container_requirements.blocks
for block in blocks:
block.doc = self.title
block.index = from_list_to_str(block.index) if not isinstance(block.index, str) else block.index
# print(f"{block.index}: {block.content}")
# print("--------------------------------------------------")
return blocks
@property
def toc(self):
"""
return the paragraphs that are in the table of contents
"""
return [p for p in self.paragraphs if p.toc]
@property
def structure(self):
return self.container.structure
def replace_tasks(self, resolutions: [str]):
if len(resolutions) == len(self.tasks): # exception to be handled
p_tasks = [p for p in self.paragraphs if p.type == 'task']
for p, r in zip(p_tasks, resolutions):
p.set_text(r)
else:
print(f"résolutions : {len(resolutions)} != {len(self.tasks)} tasks")
return self
def get_paragraphs(self):
return self.container.all_paragraphs
def get_text_from_paragraphs(self):
return [p.text for p in self.paragraphs]
def check_document(self):
"""
debugging function to analyse the doc
"""
picCount = 0
tabCount = 0
for paragraph in self.xdoc.paragraphs:
if picCount < len(self.xdoc.inline_shapes):
print('\033[1mPicture \033[0m')
picCount += 1
elif paragraph.text:
print(paragraph.text)
elif tabCount < len(self.xdoc.tables):
table = self.xdoc.tables[tabCount]
data = []
keys = None
for i, row in enumerate(table.rows):
text = (cell.text for cell in row.cells)
if i == 0:
keys = tuple(text)
continue
row_data = dict(zip(keys, text))
data.append(row_data)
print('\033[1mTable:\033[0m', data)
tabCount += 1
else:
print('\033[1mEmpty paragraph\033[0m')
def center_tables(self):
j = 0
for table in self.xdoc.tables:
j += 1
table.alignment = WD_TABLE_ALIGNMENT.CENTER
return j
def rearrange_tables(self):
"""
Hotfix for autofit.
directly from XML
"""
for t_idx, _ in enumerate(self.xdoc.tables):
self.xdoc.tables[t_idx].autofit = True
self.xdoc.tables[t_idx].allow_autofit = True
self.xdoc.tables[t_idx]._tblPr.xpath("./w:tblW")[0].attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type"] = "auto"
for row_idx, _ in enumerate(self.xdoc.tables[t_idx].rows):
for cell_idx, _ in enumerate(self.xdoc.tables[t_idx].rows[row_idx].cells):
self.xdoc.tables[t_idx].rows[row_idx].cells[cell_idx]._tc.tcPr.tcW.type = 'auto'
self.xdoc.tables[t_idx].rows[row_idx].cells[cell_idx]._tc.tcPr.tcW.w = 0
def center_images(self):
"""
works only for images in the run
"""
for paragraph in self.paragraphs:
paragraph.center_paragraph()
def justify_content(self):
"""
applied only to normal style
"""
for paragraph in self.paragraphs:
paragraph.justify_paragraph()
def number_images_in_doc(self):
"""
for debug = not used
"""
picCount = 0
for _ in self.xdoc.paragraphs:
if picCount < len(self.xdoc.inline_shapes):
print('\033[1mPicture \033[0m')
picCount += 1
return picCount
def get_all_styles_used_in_doc_except_list(self):
return self.container.get_all_styles_used_in_doc_except_list()
def get_list_styles(self):
return self.container.get_list_styles()
def retrieve_number_of_misapplied_styles(self):
return self.container.retrieve_number_of_misapplied_styles()
def normal_style_for_empty_paragraphs(self):
for p in self.paragraphs:
if p.blank and not p.toc:
p.set_style(self.styles.get_style_from_name("Normal"))
self.save_as_docx()
def append_doc_to_template_and_update_toc(self,template):
"""
TODO: rename Document_compose into XDocument
Document_compose = plain old Document from docx
Composer = from docxcompose => allows to modify several documents
"""
master = Document_compose(template.path)
composer = Composer(master)
doc = Document_compose(self.path)
composer.append(doc)
composer.save(self.path)
new_doc = Doc(self.path)
update_table_of_contents(new_doc.xdoc)
new_doc.save_as_docx()
def delete_content_before_toc(self):
"""
TODO: loop with paragraph (ours)
"""
if self.contains_toc():
for line in self.xdoc.paragraphs:
if "toc" in line.style.name:
break
if len(line.text) == 0:
self.delete_paragraph(line)
self.paragraphs.pop(0)
continue
if 'toc' not in line.style.name:
self.delete_paragraph(line)
self.paragraphs.pop(0)
self.save_as_docx()
def delete_paragraph(self, paragraph):
"""
TODO: to be put in paragraph
"""
p = paragraph._element
p.getparent().remove(p)
paragraph._p = paragraph._element = None
def delete_toc(self,template):
"""
TODO: loop with paragraph (ours)
"""
index_to_insert = None
for index, p in enumerate(template.paragraphs):
index_to_insert = index
if ("table des matières" or "table of contents") in p.text.lower():
index_to_insert += 1
break
xparagraphs_toc = [p.xparagraph for p in self.toc]
for p in xparagraphs_toc:
self.delete_paragraph(p)
self.paragraphs.pop(0)
self.save_as_docx()
def insert_table_of_content(self,index):
"""
To create a TOC (not used here)
"""
paragraph = self.xdoc.paragraphs[index].insert_paragraph_before("", "Normal")
paragraph.paragraph_format.space_before = Inches(0)
paragraph.paragraph_format.space_after = Inches(0)
run = paragraph.add_run()
fldChar = OxmlElement('w:fldChar') # creates a new element
fldChar.set(qn('w:fldCharType'), 'begin') # sets attribute on element
instrText = OxmlElement('w:instrText')
instrText.set(qn('xml:space'), 'preserve') # sets attribute on element
instrText.text = 'TOC \\o "1-5" \\h \\z \\u' # change 1-3 depending on heading levels you need
fldChar2 = OxmlElement('w:fldChar')
fldChar2.set(qn('w:fldCharType'), 'separate')
fldChar3 = OxmlElement('w:t')
fldChar3.text = "Right-click to update field."
fldChar3 = OxmlElement('w:updateFields')
fldChar3.set(qn('w:val'), 'true')
fldChar2.append(fldChar3)
fldChar4 = OxmlElement('w:fldChar')
fldChar4.set(qn('w:fldCharType'), 'end')
r_element = run._r
r_element.append(fldChar)
r_element.append(instrText)
r_element.append(fldChar2)
r_element.append(fldChar4)
p_element = paragraph._p
print(p_element.xml)
def contains_toc(self):
body_elements = self.xdoc._body._body
#extract those wrapped in <w:r> tag
rs = body_elements.xpath('.//w:r')
#check if style is hyperlink (toc)
table_of_content = []
for r in rs:
if r.style:
if "hyperlink" in r.style.lower() or "lienhypertexte" in r.style.lower():
table_of_content.append(r.text)
if len(table_of_content) > 0:
return True
else:
return False
def handle_content_before_toc(self):
"""
TODO: use a function to determine the type of the doc
"""
if not "data/templates" in self.path and not "temp/generated_files" in self.path: #PREMIER PROBLEME
self.delete_content_before_toc()
def delete_style(self, style_name):
self.styles.delete_style(style_name)
self.save_as_docx()
def change_bullet_style(self, style_name, template_style_name, template) -> {}:
"""
TODO: recode to respect the OOP
suppression of a paragraph with a bullet and rewriting of the bullet with style_name in the target styple (template_style_name)
real_style_name = core style name with no indentation
level = indentation level
"""
i = 0
real_style_name = style_name.split(' : ')[0]
level = int(style_name.split(' = ')[1])
while i < len(self.xdoc.paragraphs):
para = self.xdoc.paragraphs[i]
if real_style_name == para.style.name and self.paragraphs[i].is_list and self.paragraphs[i].list_indentation == level:
#print xml of paragraph and retrieve the level
self.delete_paragraph(self.xdoc.paragraphs[i])
self.paragraphs.pop(i)
if i == len(self.xdoc.paragraphs):
paragraph_inserted = self.xdoc.add_paragraph(para.text, style=template.styles.get_style_from_name(template_style_name))
self.paragraphs.insert(i, Paragraph(paragraph_inserted, self.id_, i, self))
else:
paragraph_inserted = self.xdoc.paragraphs[i].insert_paragraph_before(para.text, style=template.styles.get_style_from_name(template_style_name))
self.paragraphs.insert(i, Paragraph(paragraph_inserted, self.id_, i, self))
i += 1
log_dict = self.change_bullet_style_in_tables(style_name, template_style_name, template)
self.save_as_docx()
return log_dict
def change_bullet_style_in_tables(self, style_name, template_style_name, template) -> {}:
"""
same as abobe
TODO: ... same as above
"""
i = 0
real_style_name = style_name.split(' : ')[0]
level = int(style_name.split(' = ')[1])
for table in self.xdoc.tables:
for row in table.rows:
for cell in row.cells:
i = 0
for para in cell.paragraphs:
real_para = Paragraph(para, self.id_, i, self)
if real_style_name == para.style.name and real_para.is_list and real_para.list_indentation == level:
self.delete_paragraph(para)
if i == len(cell.paragraphs):
cell.add_paragraph(real_para.text, style=template.styles.get_style_from_name(template_style_name))
else:
cell.paragraphs[i].insert_paragraph_before(real_para.text, style=template.styles.get_style_from_name(template_style_name))
i += 1
log = f"Le style {style_name} a été changé en {template_style_name}"
log_dict = {'list_mapping': log}
return log_dict
def table_insertion(self, index: str, content: dict):
#the index is the index of the block in the docx file where to insert the table
#the content is the content of the table with the following format:
#content = {
# "headers": ["header1", "header2", "header3"],
# "rows": [
# ["row1", "row1", "row1"],
# ["row2", "row2", "row2"],
# ["row3", "row3", "row3"],
# ]
#}
list_of_indexes = index.split(".")
index_in_list = [eval(i) for i in list_of_indexes]
#find the container which has the index
paragraph : Paragraph = None
containers : [Container] = self.container.containers
for c in containers:
if c.index == index_in_list:
if c.title:
paragraph = c.title
else:
paragraph = c.paragraphs[0]
break
if not paragraph:
print("The index is not valid")
return None
table = self.xdoc.add_table(rows = len(content["rows"]) + 1, cols = len(content["headers"]))
#set style below
#add the header
for i, header in enumerate(content["headers"]):
table.cell(0, i).text = header
#add the rows
for i, row in enumerate(content["rows"]):
for j, cell in enumerate(row):
table.cell(i+1, j).text = cell
#insert the table after the paragraph
self.move_table_after(table, paragraph.xparagraph)
#save the doc
self.save_as_docx()
return table
def delete_table(self, table):
table._element.getparent().remove(table._element)
table._element = table._row = None
self.save_as_docx()
def move_table_after(self, table, paragraph):
tbl, p = table._tbl, paragraph._p
p.addnext(tbl)
def remove_all_but_last_section(self):
"""
not used
"""
sectPrs = self.xdoc._element.xpath(".//w:pPr/w:sectPr")
for sectPr in sectPrs:
print(sectPr)
sectPr.getparent().remove(sectPr)