Gdoc / src /domain /paragraph.py
adrien.aribaut-gaudin
push on my interface
c626d10
import string
from src.tools.doc_tools import get_positions, convert_to_png
from docx.enum.text import WD_ALIGN_PARAGRAPH
import xml.etree.ElementTree as ET
from docx.oxml.ns import qn
import zipfile
import os
import re
INFINITE = 10000
class Paragraph:
def __init__(self, xparagraph, doc_id: int, id_: int):
self.xparagraph = xparagraph
self.id_ = int(str(2) + str(doc_id) + str(id_))
style_name = self.xparagraph.style.name
self.level = self.get_level_from_name(style_name)
self.is_structure = self.level < INFINITE
self.text = self.xparagraph.text
self.type, self.parsed_text = self.parse_text()
@property
def structure(self):
structure = {str(self.id_): {
'index': str(self.id_),
'canMove': True,
'isFolder': False,
'children': [],
'title': self.text,
'canRename': True,
'data': {},
'level': self.level,
}}
return structure
@property
def blank(self):
"""
checks if the paragraph is blank: i.e. it brings some signal (it may otherwise be ignored)
"""
text = self.text.replace('\n', '')
return set(text).isdisjoint(string.ascii_letters)
@staticmethod
def get_level_from_name(style_name: str) -> int:
level = INFINITE
if '.Titre' in style_name:
suffix = style_name[-1]
try:
level = int(suffix)
except:
pass
return level
def parse_text(self) -> (str, str):
if self.is_structure:
return 'structure', self.text
startswith = {"?? ": "task", "++ ": "comment"}
for start in startswith.keys():
split = self.text.rsplit(start)
if 1 < len(split):
return startswith[start], split[1]
return "normal", self.text
def set_text(self, text: str):
self.text = text
self.xparagraph.text = text
return self
def contains_image(self) -> bool:
return any("pic:pic" in run.element.xml for run in self.xparagraph.runs)
# is_image = False
# for run in self.xparagraph.runs:
# if "pic:pic" in run.element.xml:
# xml = run.element.xml
# print(run.element.xml)
# #find the anchor element
# print(xml)
# root = ET.fromstring(xml)
# anch = ET.SubElement(root, "wp:anchor")
# item = ET.SubElement(anch, "wp:positionH")
# item2 = ET.SubElement(anch, "wp:positionV")
# # find the anchor element
# attri = root.findall(".//{http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing}anchor")
# # create a child to the positionH and positionV elements
# if attri:
# #print all the children of the anchor element
# for anchors in attri:
# childH = ET.SubElement(anchors, "wp:positionH")
# childV = ET.SubElement(anchors, "wp:positionV")
# ET.SubElement(childH, "wp:align").text = "center"
# ET.SubElement(childV, "wp:align").text = "center"
# xml = ET.tostring(root, encoding='unicode', method='xml')
# # add a child to the positionH and positionV using xml variable
# ET.SubElement(item, "wp:align").text = "center"
# ET.SubElement(item2, "wp:align").text = "center"
# print(ET.tostring(root))
# else:
# is_image = True
# return is_image
def center_paragraph(self):
if self.contains_image():
self.xparagraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
def justify_paragraph(self):
if(self.xparagraph.style.name == "Normal"):
self.xparagraph.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
# def insert_paragraphs(self,images,template_doc):
# empty_paragraph = Paragraph(template_doc.xdoc.add_paragraph(""),template_doc.id_,template_doc.container.paragraphs[-1].id_+1)
# template_doc.add_paragraph(empty_paragraph)
# template_xp = template_doc.xdoc.paragraphs[-1]
# for run in self.xparagraph.runs:
# new_run = template_xp.add_run(run.text)
# if "pic:pic" in run.element.xml:
# xml = run.element.xml
# print(xml)
# #check if there is the same image multiple times in the document
# image_name = xml.split("pic:pic")[1].split('name="')[1].split('"')[0]
# image_name = re.sub('[\s+]', '', image_name)
# image_to_put = image_name.lower() + '.png'
# #loop over all the cx and cy occurences and stop when both strings in between are numbers
# width,height = get_positions(xml)
# index_to_use = images.index("word/media/" + image_to_put)
# new_run.add_picture(images[index_to_use], width=width, height=height)
# # os.remove(images[0])
# # return images
def get_styles_in_paragraph(self):
styles = [self.xparagraph.style.name]
for run in self.xparagraph.runs:
if run.style.name != "Default Paragraph Font":
styles.append(run.style.name)
return styles