Spaces:

Hexamind
/

GenProp

Runtime error

adrien.aribaut-gaudin

fix: comment in the doc file

ecf147c about 2 years ago

18.7 kB

	import docx
	from src.tools.doc_tools import *
	from docxcompose.composer import Composer
	from docx import Document as Document_compose
	from docx.enum.table import WD_TABLE_ALIGNMENT
	from src.domain.container import Container
	from src.domain.container_requirements import Container_requirements
	from src.domain.paragraph import Paragraph
	from src.domain.styles import Styles
	import shutil
	import os
	from docx.oxml.ns import qn
	from docx.oxml.shared import OxmlElement
	from docx.shared import Inches
	from src.tools.pretty_print import pretty_print_block_and_indexes, pretty_print_paragraphs
	from src.tools.index_creation import set_indexes
	from src.reader.reader_for_requirements import WordReader

	class Doc:

	"""
	TODO: mettre _ devant les méthodes internes
	"""

	def __init__(self, path='', id_=None):
	self.xdoc = docx.Document(path)
	self.title = get_title(path)
	self.name = self.title.split('.')[0]
	self.id_ = id(self)
	self.path = path
	self.paragraphs = [Paragraph(xp, self.id_, i, self) for (i, xp) in enumerate(self.xdoc.paragraphs)]
	self.handle_content_before_toc()
	self.requirements_paragraphs = WordReader(self.path).paragraphs if not "data/templates" in self.path else []
	self.container = Container(self.paragraphs, father=self)
	self.container_requirements = Container_requirements(self.requirements_paragraphs, father=self)
	set_indexes(self.container, self.path)
	set_indexes(self.container_requirements, self.path)
	self.styles = Styles(self.xdoc.styles)
	self.tasks = [c.get_fulltask(self.container.one_liner) for c in self.container.containers if c.task]
	self.blocks = self.get_blocks()
	self.blocks_requirements = self.get_blocks_requirements()


	def copy(self, new_doc_path):
	shutil.copyfile(self.path, new_doc_path)
	new_doc = Doc(new_doc_path)
	new_doc.save_as_docx(new_doc_path)
	return new_doc

	def clear(self):
	os.remove(self.path)

	def apply_template(self, template, options_list):
	"""
	TODO: mettre le texte dans un fichier de config
	"""
	log = []
	j = 0
	if ("Justifier le texte (Normal)" in options_list):
	log.append("Le contenu du document a été justifié")
	self.justify_content()
	self.save_as_docx()
	if("Recentrer les tableaux" in options_list):
	j = self.center_tables()
	log.append(f"{j} table{'s' if j>1 else ''} centrée{'s' if j>1 else ''}")
	self.save_as_docx()
	log.append(f"Le template {template.name} a été ajouté avant le document")
	self.rearrange_tables()
	self.save_as_docx()
	log = self.styles.apply_from(template.styles, log)
	self.save_as_docx()
	self.delete_toc(template)
	self.normal_style_for_empty_paragraphs()
	self.save_as_docx()
	self.append_doc_to_template_and_update_toc(template)
	return log

	def copy_one_style(self, src_style_name: str, dest_style_name: str, template):
	style_dest = template.styles.get_style_from_name(dest_style_name)
	src_style = self.styles.get_style_from_name(src_style_name)
	if src_style:
	log = self.styles.copy_one_style(src_style, style_dest)
	return log
	else:
	return None

	def get_different_styles_with_template(self, template):
	styles_used_in_doc = self.get_all_styles_used_in_doc_except_list()
	different_styles = get_difference_with_template(styles_used_in_doc, template)
	return different_styles

	def save_as_docx(self, path: str = ''):
	path = path if path else self.path
	self.path = path
	self.xdoc.save(path)

	def get_blocks(self):

	"""
	TODO: do a function that determines if the Doc is not a template nor a generated doc
	TODO: merge the two functions for getting blocks
	TODO: why do we need two functions? in the end, we need only
	"""
	if "temp/generated_files" in self.path or "data/templates" in self.path:
	return

	def from_list_to_str(index_list):
	index_str = str(index_list[0])
	for el in index_list[1:]:
	index_str += '.' + str(el)
	return index_str

	blocks = self.container.blocks
	for block in blocks:
	block.doc = self.title
	block.index = from_list_to_str(block.index)
	return blocks


	def get_blocks_requirements(self):
	if "temp/generated_files" in self.path or "data/templates" in self.path:
	return

	def from_list_to_str(index_list):
	index_str = str(index_list[0])
	for el in index_list[1:]:
	index_str += '.' + str(el)
	return index_str

	blocks = self.container_requirements.blocks
	for block in blocks:
	block.doc = self.title
	block.index = from_list_to_str(block.index) if not isinstance(block.index, str) else block.index
	# print(f"{block.index}: {block.content}")
	# print("--------------------------------------------------")
	return blocks

	@property
	def toc(self):
	"""
	return the paragraphs that are in the table of contents
	"""
	return [p for p in self.paragraphs if p.toc]

	@property
	def structure(self):
	return self.container.structure

	def replace_tasks(self, resolutions: [str]):
	if len(resolutions) == len(self.tasks): # exception to be handled
	p_tasks = [p for p in self.paragraphs if p.type == 'task']
	for p, r in zip(p_tasks, resolutions):
	p.set_text(r)
	else:
	print(f"résolutions : {len(resolutions)} != {len(self.tasks)} tasks")
	return self

	def get_paragraphs(self):
	return self.container.all_paragraphs

	def get_text_from_paragraphs(self):
	return [p.text for p in self.paragraphs]

	def check_document(self):
	"""
	debugging function to analyse the doc
	"""
	picCount = 0
	tabCount = 0
	for paragraph in self.xdoc.paragraphs:
	if picCount < len(self.xdoc.inline_shapes):
	print('\033[1mPicture \033[0m')
	picCount += 1
	elif paragraph.text:
	print(paragraph.text)
	elif tabCount < len(self.xdoc.tables):
	table = self.xdoc.tables[tabCount]
	data = []
	keys = None
	for i, row in enumerate(table.rows):
	text = (cell.text for cell in row.cells)
	if i == 0:
	keys = tuple(text)
	continue
	row_data = dict(zip(keys, text))
	data.append(row_data)
	print('\033[1mTable:\033[0m', data)
	tabCount += 1
	else:
	print('\033[1mEmpty paragraph\033[0m')


	def center_tables(self):
	j = 0
	for table in self.xdoc.tables:
	j += 1
	table.alignment = WD_TABLE_ALIGNMENT.CENTER
	return j

	def rearrange_tables(self):
	"""
	Hotfix for autofit.
	directly from XML
	"""
	for t_idx, _ in enumerate(self.xdoc.tables):
	self.xdoc.tables[t_idx].autofit = True
	self.xdoc.tables[t_idx].allow_autofit = True
	self.xdoc.tables[t_idx]._tblPr.xpath("./w:tblW")[0].attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type"] = "auto"
	for row_idx, _ in enumerate(self.xdoc.tables[t_idx].rows):
	for cell_idx, _ in enumerate(self.xdoc.tables[t_idx].rows[row_idx].cells):
	self.xdoc.tables[t_idx].rows[row_idx].cells[cell_idx]._tc.tcPr.tcW.type = 'auto'
	self.xdoc.tables[t_idx].rows[row_idx].cells[cell_idx]._tc.tcPr.tcW.w = 0

	def center_images(self):
	"""
	works only for images in the run
	"""
	for paragraph in self.paragraphs:
	paragraph.center_paragraph()

	def justify_content(self):
	"""
	applied only to normal style
	"""
	for paragraph in self.paragraphs:
	paragraph.justify_paragraph()

	def number_images_in_doc(self):
	"""
	for debug = not used
	"""
	picCount = 0
	for _ in self.xdoc.paragraphs:
	if picCount < len(self.xdoc.inline_shapes):
	print('\033[1mPicture \033[0m')
	picCount += 1
	return picCount

	def get_all_styles_used_in_doc_except_list(self):
	return self.container.get_all_styles_used_in_doc_except_list()

	def get_list_styles(self):
	return self.container.get_list_styles()

	def retrieve_number_of_misapplied_styles(self):
	return self.container.retrieve_number_of_misapplied_styles()

	def normal_style_for_empty_paragraphs(self):
	for p in self.paragraphs:
	if p.blank and not p.toc:
	p.set_style(self.styles.get_style_from_name("Normal"))
	self.save_as_docx()


	def append_doc_to_template_and_update_toc(self,template):
	"""
	TODO: rename Document_compose into XDocument
	Document_compose = plain old Document from docx
	Composer = from docxcompose => allows to modify several documents
	"""
	master = Document_compose(template.path)
	composer = Composer(master)
	doc = Document_compose(self.path)
	composer.append(doc)
	composer.save(self.path)
	new_doc = Doc(self.path)
	update_table_of_contents(new_doc.xdoc)
	new_doc.save_as_docx()

	def delete_content_before_toc(self):
	"""
	TODO: loop with paragraph (ours)
	"""
	if self.contains_toc():
	for line in self.xdoc.paragraphs:
	if "toc" in line.style.name:
	break
	if len(line.text) == 0:
	self.delete_paragraph(line)
	self.paragraphs.pop(0)
	continue
	if 'toc' not in line.style.name:
	self.delete_paragraph(line)
	self.paragraphs.pop(0)
	self.save_as_docx()

	def delete_paragraph(self, paragraph):
	"""
	TODO: to be put in paragraph
	"""
	p = paragraph._element
	p.getparent().remove(p)
	paragraph._p = paragraph._element = None

	def delete_toc(self,template):
	"""
	TODO: loop with paragraph (ours)
	"""
	index_to_insert = None
	for index, p in enumerate(template.paragraphs):
	index_to_insert = index
	if ("table des matières" or "table of contents") in p.text.lower():
	index_to_insert += 1
	break
	xparagraphs_toc = [p.xparagraph for p in self.toc]
	for p in xparagraphs_toc:
	self.delete_paragraph(p)
	self.paragraphs.pop(0)
	self.save_as_docx()


	def insert_table_of_content(self,index):
	"""
	To create a TOC (not used here)
	"""
	paragraph = self.xdoc.paragraphs[index].insert_paragraph_before("", "Normal")
	paragraph.paragraph_format.space_before = Inches(0)
	paragraph.paragraph_format.space_after = Inches(0)
	run = paragraph.add_run()

	fldChar = OxmlElement('w:fldChar') # creates a new element
	fldChar.set(qn('w:fldCharType'), 'begin') # sets attribute on element

	instrText = OxmlElement('w:instrText')
	instrText.set(qn('xml:space'), 'preserve') # sets attribute on element
	instrText.text = 'TOC \\o "1-5" \\h \\z \\u' # change 1-3 depending on heading levels you need

	fldChar2 = OxmlElement('w:fldChar')
	fldChar2.set(qn('w:fldCharType'), 'separate')

	fldChar3 = OxmlElement('w:t')
	fldChar3.text = "Right-click to update field."
	fldChar3 = OxmlElement('w:updateFields')
	fldChar3.set(qn('w:val'), 'true')
	fldChar2.append(fldChar3)

	fldChar4 = OxmlElement('w:fldChar')
	fldChar4.set(qn('w:fldCharType'), 'end')

	r_element = run._r
	r_element.append(fldChar)
	r_element.append(instrText)
	r_element.append(fldChar2)
	r_element.append(fldChar4)

	p_element = paragraph._p
	print(p_element.xml)


	def contains_toc(self):
	body_elements = self.xdoc._body._body
	#extract those wrapped in <w:r> tag
	rs = body_elements.xpath('.//w:r')
	#check if style is hyperlink (toc)
	table_of_content = []
	for r in rs:
	if r.style:
	if "hyperlink" in r.style.lower() or "lienhypertexte" in r.style.lower():
	table_of_content.append(r.text)
	if len(table_of_content) > 0:
	return True
	else:
	return False

	def handle_content_before_toc(self):
	"""
	TODO: use a function to determine the type of the doc
	"""
	if not "data/templates" in self.path and not "temp/generated_files" in self.path: #PREMIER PROBLEME
	self.delete_content_before_toc()


	def delete_style(self, style_name):
	self.styles.delete_style(style_name)
	self.save_as_docx()

	def change_bullet_style(self, style_name, template_style_name, template) -> {}:
	"""
	TODO: recode to respect the OOP
	suppression of a paragraph with a bullet and rewriting of the bullet with style_name in the target styple (template_style_name)
	real_style_name = core style name with no indentation
	level = indentation level
	"""
	i = 0
	real_style_name = style_name.split(' : ')[0]
	level = int(style_name.split(' = ')[1])
	while i < len(self.xdoc.paragraphs):
	para = self.xdoc.paragraphs[i]
	if real_style_name == para.style.name and self.paragraphs[i].is_list and self.paragraphs[i].list_indentation == level:
	#print xml of paragraph and retrieve the level
	self.delete_paragraph(self.xdoc.paragraphs[i])
	self.paragraphs.pop(i)
	if i == len(self.xdoc.paragraphs):
	paragraph_inserted = self.xdoc.add_paragraph(para.text, style=template.styles.get_style_from_name(template_style_name))
	self.paragraphs.insert(i, Paragraph(paragraph_inserted, self.id_, i, self))
	else:
	paragraph_inserted = self.xdoc.paragraphs[i].insert_paragraph_before(para.text, style=template.styles.get_style_from_name(template_style_name))
	self.paragraphs.insert(i, Paragraph(paragraph_inserted, self.id_, i, self))
	i += 1
	log_dict = self.change_bullet_style_in_tables(style_name, template_style_name, template)
	self.save_as_docx()
	return log_dict

	def change_bullet_style_in_tables(self, style_name, template_style_name, template) -> {}:
	"""
	same as abobe
	TODO: ... same as above
	"""
	i = 0
	real_style_name = style_name.split(' : ')[0]
	level = int(style_name.split(' = ')[1])
	for table in self.xdoc.tables:
	for row in table.rows:
	for cell in row.cells:
	i = 0
	for para in cell.paragraphs:
	real_para = Paragraph(para, self.id_, i, self)
	if real_style_name == para.style.name and real_para.is_list and real_para.list_indentation == level:
	self.delete_paragraph(para)
	if i == len(cell.paragraphs):
	cell.add_paragraph(real_para.text, style=template.styles.get_style_from_name(template_style_name))
	else:
	cell.paragraphs[i].insert_paragraph_before(real_para.text, style=template.styles.get_style_from_name(template_style_name))
	i += 1
	log = f"Le style {style_name} a été changé en {template_style_name}"
	log_dict = {'list_mapping': log}
	return log_dict

	def table_insertion(self, index: str, content: dict):
	#the index is the index of the block in the docx file where to insert the table
	#the content is the content of the table with the following format:
	#content = {
	# "headers": ["header1", "header2", "header3"],
	# "rows": [
	# ["row1", "row1", "row1"],
	# ["row2", "row2", "row2"],
	# ["row3", "row3", "row3"],
	# ]
	#}
	list_of_indexes = index.split(".")
	index_in_list = [eval(i) for i in list_of_indexes]
	#find the container which has the index
	paragraph : Paragraph = None
	containers : [Container] = self.container.containers
	for c in containers:
	if c.index == index_in_list:
	if c.title:
	paragraph = c.title
	else:
	paragraph = c.paragraphs[0]
	break
	if not paragraph:
	print("The index is not valid")
	return None

	table = self.xdoc.add_table(rows = len(content["rows"]) + 1, cols = len(content["headers"]))
	#set style below


	#add the header
	for i, header in enumerate(content["headers"]):
	table.cell(0, i).text = header
	#add the rows
	for i, row in enumerate(content["rows"]):
	for j, cell in enumerate(row):
	table.cell(i+1, j).text = cell
	#insert the table after the paragraph
	self.move_table_after(table, paragraph.xparagraph)
	#save the doc
	self.save_as_docx()
	return table

	def delete_table(self, table):
	table._element.getparent().remove(table._element)
	table._element = table._row = None
	self.save_as_docx()

	def move_table_after(self, table, paragraph):
	tbl, p = table._tbl, paragraph._p
	p.addnext(tbl)


	def remove_all_but_last_section(self):
	"""
	not used
	"""
	sectPrs = self.xdoc._element.xpath(".//w:pPr/w:sectPr")
	for sectPr in sectPrs:
	print(sectPr)
	sectPr.getparent().remove(sectPr)