import string from docx.enum.text import WD_ALIGN_PARAGRAPH from src.tools.paragraph_tools import find_list_indentation_level INFINITE = 10000 class Paragraph: def __init__(self, xparagraph, doc_id: int, id_: int, doc): self.doc = doc self.xparagraph = xparagraph self.is_template_para = False if not "data/templates" in self.doc.path else True self.id_ = int(str(2) + str(doc_id) + str(id_)) self.style_name = self.xparagraph.style.name self.is_list, self.list_indentation = find_list_indentation_level(self.xparagraph, self.doc) if not self.is_template_para else (False, 0) self.level = self.get_level_from_name(self.style_name) self.is_structure = self.level < INFINITE self.text = self.xparagraph.text self.type, self.parsed_text = self.parse_text() @property def style_misapplied(self): """ function bugged, not used """ #check if the actual paragraph style properties are the same as the style itself #if not, the style is misapplied first_run_style = [run.style.font for run in self.xparagraph.runs] first_run_style = first_run_style[0] if first_run_style else None if not first_run_style: return False doc_style = self.doc.styles.get_style_from_name(self.style_name) if first_run_style.size != doc_style.font.size: return True if first_run_style.name != doc_style.font.name: return True if first_run_style.bold != doc_style.font.bold: return True if first_run_style.italic != doc_style.font.italic: return True if first_run_style.underline != doc_style.font.underline: return True if first_run_style.all_caps != doc_style.font.all_caps: return True if first_run_style.color.rgb != doc_style.font.color.rgb: return True return False @property def structure(self): structure = {str(self.id_): { 'index': str(self.id_), 'canMove': True, 'isFolder': False, 'children': [], 'title': self.text, 'canRename': True, 'data': {}, 'level': self.level, }} return structure @property def blank(self): """ checks if the paragraph is blank: i.e. it brings some signal (it may otherwise be ignored) """ text = self.text.replace('\n', '') return set(text).isdisjoint(string.ascii_letters) @property def toc(self): """ Check if the paragraph is part of the table of contents """ return "toc" in self.style_name @staticmethod def get_level_from_name(style_name: str) -> int: level = INFINITE if 'Titre' in style_name or 'Heading' in style_name: suffix = style_name[-1] try: level = int(suffix) except: pass return level def parse_text(self) -> (str, str): if self.is_structure: return 'structure', self.text startswith = {"?? ": "task", "++ ": "comment"} for start in startswith.keys(): split = self.text.rsplit(start) if 1 < len(split): return startswith[start], split[1] return "normal", self.text def set_text(self, text: str): self.text = text self.xparagraph.text = text return self def center_paragraph(self): if self.contains_image(): self.xparagraph.alignment = WD_ALIGN_PARAGRAPH.CENTER def justify_paragraph(self): if(self.xparagraph.style.name == "Normal"): self.xparagraph.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY def contains_image(self) -> bool: return any("pic:pic" in run.element.xml for run in self.xparagraph.runs) def get_styles_in_paragraph_except_list(self): styles = [self.xparagraph.style] if not self.is_list else [] for run in self.xparagraph.runs: if run.style.name != "Default Paragraph Font" and run.style.name != self.xparagraph.style.name: styles.append(run.style) return styles def get_list_styles(self): styles = [] if self.is_list: styles.append(self.xparagraph.style.name + " : indentation = " + str(self.list_indentation)) return styles def set_style(self, style): self.xparagraph.style = style return self