GenProp / src /domain /paragraph.py
adrien.aribaut-gaudin
feat: new public GenProp
498db6b
import string
from docx.enum.text import WD_ALIGN_PARAGRAPH
from src.tools.paragraph_tools import find_list_indentation_level
INFINITE = 10000
class Paragraph:
def __init__(self, xparagraph, doc_id: int, id_: int, doc):
self.doc = doc
self.xparagraph = xparagraph
self.is_template_para = False if not "data/templates" in self.doc.path else True
self.id_ = int(str(2) + str(doc_id) + str(id_))
self.style_name = self.xparagraph.style.name
self.is_list, self.list_indentation = find_list_indentation_level(self.xparagraph, self.doc) if not self.is_template_para else (False, 0)
self.level = self.get_level_from_name(self.style_name)
self.is_structure = self.level < INFINITE
self.text = self.xparagraph.text
self.type, self.parsed_text = self.parse_text()
@property
def style_misapplied(self):
"""
function bugged, not used
"""
#check if the actual paragraph style properties are the same as the style itself
#if not, the style is misapplied
first_run_style = [run.style.font for run in self.xparagraph.runs]
first_run_style = first_run_style[0] if first_run_style else None
if not first_run_style:
return False
doc_style = self.doc.styles.get_style_from_name(self.style_name)
if first_run_style.size != doc_style.font.size:
return True
if first_run_style.name != doc_style.font.name:
return True
if first_run_style.bold != doc_style.font.bold:
return True
if first_run_style.italic != doc_style.font.italic:
return True
if first_run_style.underline != doc_style.font.underline:
return True
if first_run_style.all_caps != doc_style.font.all_caps:
return True
if first_run_style.color.rgb != doc_style.font.color.rgb:
return True
return False
@property
def structure(self):
structure = {str(self.id_): {
'index': str(self.id_),
'canMove': True,
'isFolder': False,
'children': [],
'title': self.text,
'canRename': True,
'data': {},
'level': self.level,
}}
return structure
@property
def blank(self):
"""
checks if the paragraph is blank: i.e. it brings some signal (it may otherwise be ignored)
"""
text = self.text.replace('\n', '')
return set(text).isdisjoint(string.ascii_letters)
@property
def toc(self):
"""
Check if the paragraph is part of the table of contents
"""
return "toc" in self.style_name
@staticmethod
def get_level_from_name(style_name: str) -> int:
level = INFINITE
if 'Titre' in style_name or 'Heading' in style_name:
suffix = style_name[-1]
try:
level = int(suffix)
except:
pass
return level
def parse_text(self) -> (str, str):
if self.is_structure:
return 'structure', self.text
startswith = {"?? ": "task", "++ ": "comment"}
for start in startswith.keys():
split = self.text.rsplit(start)
if 1 < len(split):
return startswith[start], split[1]
return "normal", self.text
def set_text(self, text: str):
self.text = text
self.xparagraph.text = text
return self
def center_paragraph(self):
if self.contains_image():
self.xparagraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
def justify_paragraph(self):
if(self.xparagraph.style.name == "Normal"):
self.xparagraph.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
def contains_image(self) -> bool:
return any("pic:pic" in run.element.xml for run in self.xparagraph.runs)
def get_styles_in_paragraph_except_list(self):
styles = [self.xparagraph.style] if not self.is_list else []
for run in self.xparagraph.runs:
if run.style.name != "Default Paragraph Font" and run.style.name != self.xparagraph.style.name:
styles.append(run.style)
return styles
def get_list_styles(self):
styles = []
if self.is_list:
styles.append(self.xparagraph.style.name + " : indentation = " + str(self.list_indentation))
return styles
def set_style(self, style):
self.xparagraph.style = style
return self