|
|
import docx |
|
|
from src.tools.doc_tools import * |
|
|
from docxcompose.composer import Composer |
|
|
from docx import Document as Document_compose |
|
|
from docx.enum.table import WD_TABLE_ALIGNMENT |
|
|
from src.domain.container import Container |
|
|
from src.domain.container_requirements import Container_requirements |
|
|
from src.domain.paragraph import Paragraph |
|
|
from src.domain.styles import Styles |
|
|
import shutil |
|
|
import os |
|
|
from docx.oxml.ns import qn |
|
|
from docx.oxml.shared import OxmlElement |
|
|
from docx.shared import Inches |
|
|
from src.tools.pretty_print import pretty_print_block_and_indexes, pretty_print_paragraphs |
|
|
from src.tools.index_creation import set_indexes |
|
|
from src.reader.reader_for_requirements import WordReader |
|
|
|
|
|
class Doc: |
|
|
|
|
|
""" |
|
|
TODO: mettre _ devant les méthodes internes |
|
|
""" |
|
|
|
|
|
def __init__(self, path='', id_=None): |
|
|
self.xdoc = docx.Document(path) |
|
|
self.title = get_title(path) |
|
|
self.name = self.title.split('.')[0] |
|
|
self.id_ = id(self) |
|
|
self.path = path |
|
|
self.paragraphs = [Paragraph(xp, self.id_, i, self) for (i, xp) in enumerate(self.xdoc.paragraphs)] |
|
|
self.handle_content_before_toc() |
|
|
self.requirements_paragraphs = WordReader(self.path).paragraphs if not "data/templates" in self.path else [] |
|
|
self.container = Container(self.paragraphs, father=self) |
|
|
self.container_requirements = Container_requirements(self.requirements_paragraphs, father=self) |
|
|
set_indexes(self.container, self.path) |
|
|
set_indexes(self.container_requirements, self.path) |
|
|
self.styles = Styles(self.xdoc.styles) |
|
|
self.tasks = [c.get_fulltask(self.container.one_liner) for c in self.container.containers if c.task] |
|
|
self.blocks = self.get_blocks() |
|
|
self.blocks_requirements = self.get_blocks_requirements() |
|
|
|
|
|
|
|
|
def copy(self, new_doc_path): |
|
|
shutil.copyfile(self.path, new_doc_path) |
|
|
new_doc = Doc(new_doc_path) |
|
|
new_doc.save_as_docx(new_doc_path) |
|
|
return new_doc |
|
|
|
|
|
def clear(self): |
|
|
os.remove(self.path) |
|
|
|
|
|
def apply_template(self, template, options_list): |
|
|
""" |
|
|
TODO: mettre le texte dans un fichier de config |
|
|
""" |
|
|
log = [] |
|
|
j = 0 |
|
|
if ("Justifier le texte (Normal)" in options_list): |
|
|
log.append("Le contenu du document a été justifié") |
|
|
self.justify_content() |
|
|
self.save_as_docx() |
|
|
if("Recentrer les tableaux" in options_list): |
|
|
j = self.center_tables() |
|
|
log.append(f"{j} table{'s' if j>1 else ''} centrée{'s' if j>1 else ''}") |
|
|
self.save_as_docx() |
|
|
log.append(f"Le template {template.name} a été ajouté avant le document") |
|
|
self.rearrange_tables() |
|
|
self.save_as_docx() |
|
|
log = self.styles.apply_from(template.styles, log) |
|
|
self.save_as_docx() |
|
|
self.delete_toc(template) |
|
|
self.normal_style_for_empty_paragraphs() |
|
|
self.save_as_docx() |
|
|
self.append_doc_to_template_and_update_toc(template) |
|
|
return log |
|
|
|
|
|
def copy_one_style(self, src_style_name: str, dest_style_name: str, template): |
|
|
style_dest = template.styles.get_style_from_name(dest_style_name) |
|
|
src_style = self.styles.get_style_from_name(src_style_name) |
|
|
if src_style: |
|
|
log = self.styles.copy_one_style(src_style, style_dest) |
|
|
return log |
|
|
else: |
|
|
return None |
|
|
|
|
|
def get_different_styles_with_template(self, template): |
|
|
styles_used_in_doc = self.get_all_styles_used_in_doc_except_list() |
|
|
different_styles = get_difference_with_template(styles_used_in_doc, template) |
|
|
return different_styles |
|
|
|
|
|
def save_as_docx(self, path: str = ''): |
|
|
path = path if path else self.path |
|
|
self.path = path |
|
|
self.xdoc.save(path) |
|
|
|
|
|
def get_blocks(self): |
|
|
|
|
|
""" |
|
|
TODO: do a function that determines if the Doc is not a template nor a generated doc |
|
|
TODO: merge the two functions for getting blocks |
|
|
TODO: why do we need two functions? in the end, we need only |
|
|
""" |
|
|
if "temp/generated_files" in self.path or "data/templates" in self.path: |
|
|
return |
|
|
|
|
|
def from_list_to_str(index_list): |
|
|
index_str = str(index_list[0]) |
|
|
for el in index_list[1:]: |
|
|
index_str += '.' + str(el) |
|
|
return index_str |
|
|
|
|
|
blocks = self.container.blocks |
|
|
for block in blocks: |
|
|
block.doc = self.title |
|
|
block.index = from_list_to_str(block.index) |
|
|
return blocks |
|
|
|
|
|
|
|
|
def get_blocks_requirements(self): |
|
|
if "temp/generated_files" in self.path or "data/templates" in self.path: |
|
|
return |
|
|
|
|
|
def from_list_to_str(index_list): |
|
|
index_str = str(index_list[0]) |
|
|
for el in index_list[1:]: |
|
|
index_str += '.' + str(el) |
|
|
return index_str |
|
|
|
|
|
blocks = self.container_requirements.blocks |
|
|
for block in blocks: |
|
|
block.doc = self.title |
|
|
block.index = from_list_to_str(block.index) if not isinstance(block.index, str) else block.index |
|
|
|
|
|
|
|
|
return blocks |
|
|
|
|
|
@property |
|
|
def toc(self): |
|
|
""" |
|
|
return the paragraphs that are in the table of contents |
|
|
""" |
|
|
return [p for p in self.paragraphs if p.toc] |
|
|
|
|
|
@property |
|
|
def structure(self): |
|
|
return self.container.structure |
|
|
|
|
|
def replace_tasks(self, resolutions: [str]): |
|
|
if len(resolutions) == len(self.tasks): |
|
|
p_tasks = [p for p in self.paragraphs if p.type == 'task'] |
|
|
for p, r in zip(p_tasks, resolutions): |
|
|
p.set_text(r) |
|
|
else: |
|
|
print(f"résolutions : {len(resolutions)} != {len(self.tasks)} tasks") |
|
|
return self |
|
|
|
|
|
def get_paragraphs(self): |
|
|
return self.container.all_paragraphs |
|
|
|
|
|
def get_text_from_paragraphs(self): |
|
|
return [p.text for p in self.paragraphs] |
|
|
|
|
|
def check_document(self): |
|
|
""" |
|
|
debugging function to analyse the doc |
|
|
""" |
|
|
picCount = 0 |
|
|
tabCount = 0 |
|
|
for paragraph in self.xdoc.paragraphs: |
|
|
if picCount < len(self.xdoc.inline_shapes): |
|
|
print('\033[1mPicture \033[0m') |
|
|
picCount += 1 |
|
|
elif paragraph.text: |
|
|
print(paragraph.text) |
|
|
elif tabCount < len(self.xdoc.tables): |
|
|
table = self.xdoc.tables[tabCount] |
|
|
data = [] |
|
|
keys = None |
|
|
for i, row in enumerate(table.rows): |
|
|
text = (cell.text for cell in row.cells) |
|
|
if i == 0: |
|
|
keys = tuple(text) |
|
|
continue |
|
|
row_data = dict(zip(keys, text)) |
|
|
data.append(row_data) |
|
|
print('\033[1mTable:\033[0m', data) |
|
|
tabCount += 1 |
|
|
else: |
|
|
print('\033[1mEmpty paragraph\033[0m') |
|
|
|
|
|
|
|
|
def center_tables(self): |
|
|
j = 0 |
|
|
for table in self.xdoc.tables: |
|
|
j += 1 |
|
|
table.alignment = WD_TABLE_ALIGNMENT.CENTER |
|
|
return j |
|
|
|
|
|
def rearrange_tables(self): |
|
|
""" |
|
|
Hotfix for autofit. |
|
|
directly from XML |
|
|
""" |
|
|
for t_idx, _ in enumerate(self.xdoc.tables): |
|
|
self.xdoc.tables[t_idx].autofit = True |
|
|
self.xdoc.tables[t_idx].allow_autofit = True |
|
|
self.xdoc.tables[t_idx]._tblPr.xpath("./w:tblW")[0].attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type"] = "auto" |
|
|
for row_idx, _ in enumerate(self.xdoc.tables[t_idx].rows): |
|
|
for cell_idx, _ in enumerate(self.xdoc.tables[t_idx].rows[row_idx].cells): |
|
|
self.xdoc.tables[t_idx].rows[row_idx].cells[cell_idx]._tc.tcPr.tcW.type = 'auto' |
|
|
self.xdoc.tables[t_idx].rows[row_idx].cells[cell_idx]._tc.tcPr.tcW.w = 0 |
|
|
|
|
|
def center_images(self): |
|
|
""" |
|
|
works only for images in the run |
|
|
""" |
|
|
for paragraph in self.paragraphs: |
|
|
paragraph.center_paragraph() |
|
|
|
|
|
def justify_content(self): |
|
|
""" |
|
|
applied only to normal style |
|
|
""" |
|
|
for paragraph in self.paragraphs: |
|
|
paragraph.justify_paragraph() |
|
|
|
|
|
def number_images_in_doc(self): |
|
|
""" |
|
|
for debug = not used |
|
|
""" |
|
|
picCount = 0 |
|
|
for _ in self.xdoc.paragraphs: |
|
|
if picCount < len(self.xdoc.inline_shapes): |
|
|
print('\033[1mPicture \033[0m') |
|
|
picCount += 1 |
|
|
return picCount |
|
|
|
|
|
def get_all_styles_used_in_doc_except_list(self): |
|
|
return self.container.get_all_styles_used_in_doc_except_list() |
|
|
|
|
|
def get_list_styles(self): |
|
|
return self.container.get_list_styles() |
|
|
|
|
|
def retrieve_number_of_misapplied_styles(self): |
|
|
return self.container.retrieve_number_of_misapplied_styles() |
|
|
|
|
|
def normal_style_for_empty_paragraphs(self): |
|
|
for p in self.paragraphs: |
|
|
if p.blank and not p.toc: |
|
|
p.set_style(self.styles.get_style_from_name("Normal")) |
|
|
self.save_as_docx() |
|
|
|
|
|
|
|
|
def append_doc_to_template_and_update_toc(self,template): |
|
|
""" |
|
|
TODO: rename Document_compose into XDocument |
|
|
Document_compose = plain old Document from docx |
|
|
Composer = from docxcompose => allows to modify several documents |
|
|
""" |
|
|
master = Document_compose(template.path) |
|
|
composer = Composer(master) |
|
|
doc = Document_compose(self.path) |
|
|
composer.append(doc) |
|
|
composer.save(self.path) |
|
|
new_doc = Doc(self.path) |
|
|
update_table_of_contents(new_doc.xdoc) |
|
|
new_doc.save_as_docx() |
|
|
|
|
|
def delete_content_before_toc(self): |
|
|
""" |
|
|
TODO: loop with paragraph (ours) |
|
|
""" |
|
|
if self.contains_toc(): |
|
|
for line in self.xdoc.paragraphs: |
|
|
if "toc" in line.style.name: |
|
|
break |
|
|
if len(line.text) == 0: |
|
|
self.delete_paragraph(line) |
|
|
self.paragraphs.pop(0) |
|
|
continue |
|
|
if 'toc' not in line.style.name: |
|
|
self.delete_paragraph(line) |
|
|
self.paragraphs.pop(0) |
|
|
self.save_as_docx() |
|
|
|
|
|
def delete_paragraph(self, paragraph): |
|
|
""" |
|
|
TODO: to be put in paragraph |
|
|
""" |
|
|
p = paragraph._element |
|
|
p.getparent().remove(p) |
|
|
paragraph._p = paragraph._element = None |
|
|
|
|
|
def delete_toc(self,template): |
|
|
""" |
|
|
TODO: loop with paragraph (ours) |
|
|
""" |
|
|
index_to_insert = None |
|
|
for index, p in enumerate(template.paragraphs): |
|
|
index_to_insert = index |
|
|
if ("table des matières" or "table of contents") in p.text.lower(): |
|
|
index_to_insert += 1 |
|
|
break |
|
|
xparagraphs_toc = [p.xparagraph for p in self.toc] |
|
|
for p in xparagraphs_toc: |
|
|
self.delete_paragraph(p) |
|
|
self.paragraphs.pop(0) |
|
|
self.save_as_docx() |
|
|
|
|
|
|
|
|
def insert_table_of_content(self,index): |
|
|
""" |
|
|
To create a TOC (not used here) |
|
|
""" |
|
|
paragraph = self.xdoc.paragraphs[index].insert_paragraph_before("", "Normal") |
|
|
paragraph.paragraph_format.space_before = Inches(0) |
|
|
paragraph.paragraph_format.space_after = Inches(0) |
|
|
run = paragraph.add_run() |
|
|
|
|
|
fldChar = OxmlElement('w:fldChar') |
|
|
fldChar.set(qn('w:fldCharType'), 'begin') |
|
|
|
|
|
instrText = OxmlElement('w:instrText') |
|
|
instrText.set(qn('xml:space'), 'preserve') |
|
|
instrText.text = 'TOC \\o "1-5" \\h \\z \\u' |
|
|
|
|
|
fldChar2 = OxmlElement('w:fldChar') |
|
|
fldChar2.set(qn('w:fldCharType'), 'separate') |
|
|
|
|
|
fldChar3 = OxmlElement('w:t') |
|
|
fldChar3.text = "Right-click to update field." |
|
|
fldChar3 = OxmlElement('w:updateFields') |
|
|
fldChar3.set(qn('w:val'), 'true') |
|
|
fldChar2.append(fldChar3) |
|
|
|
|
|
fldChar4 = OxmlElement('w:fldChar') |
|
|
fldChar4.set(qn('w:fldCharType'), 'end') |
|
|
|
|
|
r_element = run._r |
|
|
r_element.append(fldChar) |
|
|
r_element.append(instrText) |
|
|
r_element.append(fldChar2) |
|
|
r_element.append(fldChar4) |
|
|
|
|
|
p_element = paragraph._p |
|
|
print(p_element.xml) |
|
|
|
|
|
|
|
|
def contains_toc(self): |
|
|
body_elements = self.xdoc._body._body |
|
|
|
|
|
rs = body_elements.xpath('.//w:r') |
|
|
|
|
|
table_of_content = [] |
|
|
for r in rs: |
|
|
if r.style: |
|
|
if "hyperlink" in r.style.lower() or "lienhypertexte" in r.style.lower(): |
|
|
table_of_content.append(r.text) |
|
|
if len(table_of_content) > 0: |
|
|
return True |
|
|
else: |
|
|
return False |
|
|
|
|
|
def handle_content_before_toc(self): |
|
|
""" |
|
|
TODO: use a function to determine the type of the doc |
|
|
""" |
|
|
if not "data/templates" in self.path and not "temp/generated_files" in self.path: |
|
|
self.delete_content_before_toc() |
|
|
|
|
|
|
|
|
def delete_style(self, style_name): |
|
|
self.styles.delete_style(style_name) |
|
|
self.save_as_docx() |
|
|
|
|
|
def change_bullet_style(self, style_name, template_style_name, template) -> {}: |
|
|
""" |
|
|
TODO: recode to respect the OOP |
|
|
suppression of a paragraph with a bullet and rewriting of the bullet with style_name in the target styple (template_style_name) |
|
|
real_style_name = core style name with no indentation |
|
|
level = indentation level |
|
|
""" |
|
|
i = 0 |
|
|
real_style_name = style_name.split(' : ')[0] |
|
|
level = int(style_name.split(' = ')[1]) |
|
|
while i < len(self.xdoc.paragraphs): |
|
|
para = self.xdoc.paragraphs[i] |
|
|
if real_style_name == para.style.name and self.paragraphs[i].is_list and self.paragraphs[i].list_indentation == level: |
|
|
|
|
|
self.delete_paragraph(self.xdoc.paragraphs[i]) |
|
|
self.paragraphs.pop(i) |
|
|
if i == len(self.xdoc.paragraphs): |
|
|
paragraph_inserted = self.xdoc.add_paragraph(para.text, style=template.styles.get_style_from_name(template_style_name)) |
|
|
self.paragraphs.insert(i, Paragraph(paragraph_inserted, self.id_, i, self)) |
|
|
else: |
|
|
paragraph_inserted = self.xdoc.paragraphs[i].insert_paragraph_before(para.text, style=template.styles.get_style_from_name(template_style_name)) |
|
|
self.paragraphs.insert(i, Paragraph(paragraph_inserted, self.id_, i, self)) |
|
|
i += 1 |
|
|
log_dict = self.change_bullet_style_in_tables(style_name, template_style_name, template) |
|
|
self.save_as_docx() |
|
|
return log_dict |
|
|
|
|
|
def change_bullet_style_in_tables(self, style_name, template_style_name, template) -> {}: |
|
|
""" |
|
|
same as abobe |
|
|
TODO: ... same as above |
|
|
""" |
|
|
i = 0 |
|
|
real_style_name = style_name.split(' : ')[0] |
|
|
level = int(style_name.split(' = ')[1]) |
|
|
for table in self.xdoc.tables: |
|
|
for row in table.rows: |
|
|
for cell in row.cells: |
|
|
i = 0 |
|
|
for para in cell.paragraphs: |
|
|
real_para = Paragraph(para, self.id_, i, self) |
|
|
if real_style_name == para.style.name and real_para.is_list and real_para.list_indentation == level: |
|
|
self.delete_paragraph(para) |
|
|
if i == len(cell.paragraphs): |
|
|
cell.add_paragraph(real_para.text, style=template.styles.get_style_from_name(template_style_name)) |
|
|
else: |
|
|
cell.paragraphs[i].insert_paragraph_before(real_para.text, style=template.styles.get_style_from_name(template_style_name)) |
|
|
i += 1 |
|
|
log = f"Le style {style_name} a été changé en {template_style_name}" |
|
|
log_dict = {'list_mapping': log} |
|
|
return log_dict |
|
|
|
|
|
def table_insertion(self, index: str, content: dict): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
list_of_indexes = index.split(".") |
|
|
index_in_list = [eval(i) for i in list_of_indexes] |
|
|
|
|
|
paragraph : Paragraph = None |
|
|
containers : [Container] = self.container.containers |
|
|
for c in containers: |
|
|
if c.index == index_in_list: |
|
|
if c.title: |
|
|
paragraph = c.title |
|
|
else: |
|
|
paragraph = c.paragraphs[0] |
|
|
break |
|
|
if not paragraph: |
|
|
print("The index is not valid") |
|
|
return None |
|
|
|
|
|
table = self.xdoc.add_table(rows = len(content["rows"]) + 1, cols = len(content["headers"])) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for i, header in enumerate(content["headers"]): |
|
|
table.cell(0, i).text = header |
|
|
|
|
|
for i, row in enumerate(content["rows"]): |
|
|
for j, cell in enumerate(row): |
|
|
table.cell(i+1, j).text = cell |
|
|
|
|
|
self.move_table_after(table, paragraph.xparagraph) |
|
|
|
|
|
self.save_as_docx() |
|
|
return table |
|
|
|
|
|
def delete_table(self, table): |
|
|
table._element.getparent().remove(table._element) |
|
|
table._element = table._row = None |
|
|
self.save_as_docx() |
|
|
|
|
|
def move_table_after(self, table, paragraph): |
|
|
tbl, p = table._tbl, paragraph._p |
|
|
p.addnext(tbl) |
|
|
|
|
|
|
|
|
def remove_all_but_last_section(self): |
|
|
""" |
|
|
not used |
|
|
""" |
|
|
sectPrs = self.xdoc._element.xpath(".//w:pPr/w:sectPr") |
|
|
for sectPr in sectPrs: |
|
|
print(sectPr) |
|
|
sectPr.getparent().remove(sectPr) |