Spaces:

Adrien-AG
/

Gdoc

Paused

Gdoc / src /domain /paragraph.py

adrien.aribaut-gaudin

push on my interface

c626d10 over 2 years ago

5.58 kB

	import string
	from src.tools.doc_tools import get_positions, convert_to_png
	from docx.enum.text import WD_ALIGN_PARAGRAPH
	import xml.etree.ElementTree as ET
	from docx.oxml.ns import qn
	import zipfile
	import os
	import re


	INFINITE = 10000

	class Paragraph:

	def __init__(self, xparagraph, doc_id: int, id_: int):

	self.xparagraph = xparagraph
	self.id_ = int(str(2) + str(doc_id) + str(id_))
	style_name = self.xparagraph.style.name
	self.level = self.get_level_from_name(style_name)
	self.is_structure = self.level < INFINITE
	self.text = self.xparagraph.text
	self.type, self.parsed_text = self.parse_text()


	@property
	def structure(self):
	structure = {str(self.id_): {
	'index': str(self.id_),
	'canMove': True,
	'isFolder': False,
	'children': [],
	'title': self.text,
	'canRename': True,
	'data': {},
	'level': self.level,
	}}
	return structure

	@property
	def blank(self):
	"""
	checks if the paragraph is blank: i.e. it brings some signal (it may otherwise be ignored)
	"""
	text = self.text.replace('\n', '')
	return set(text).isdisjoint(string.ascii_letters)

	@staticmethod
	def get_level_from_name(style_name: str) -> int:
	level = INFINITE
	if '.Titre' in style_name:
	suffix = style_name[-1]
	try:
	level = int(suffix)
	except:
	pass
	return level

	def parse_text(self) -> (str, str):

	if self.is_structure:
	return 'structure', self.text

	startswith = {"?? ": "task", "++ ": "comment"}
	for start in startswith.keys():
	split = self.text.rsplit(start)
	if 1 < len(split):
	return startswith[start], split[1]

	return "normal", self.text

	def set_text(self, text: str):
	self.text = text
	self.xparagraph.text = text
	return self

	def contains_image(self) -> bool:
	return any("pic:pic" in run.element.xml for run in self.xparagraph.runs)
	# is_image = False
	# for run in self.xparagraph.runs:
	# if "pic:pic" in run.element.xml:
	# xml = run.element.xml
	# print(run.element.xml)
	# #find the anchor element
	# print(xml)
	# root = ET.fromstring(xml)
	# anch = ET.SubElement(root, "wp:anchor")
	# item = ET.SubElement(anch, "wp:positionH")
	# item2 = ET.SubElement(anch, "wp:positionV")
	# # find the anchor element
	# attri = root.findall(".//{http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing}anchor")
	# # create a child to the positionH and positionV elements
	# if attri:
	# #print all the children of the anchor element
	# for anchors in attri:
	# childH = ET.SubElement(anchors, "wp:positionH")
	# childV = ET.SubElement(anchors, "wp:positionV")
	# ET.SubElement(childH, "wp:align").text = "center"
	# ET.SubElement(childV, "wp:align").text = "center"
	# xml = ET.tostring(root, encoding='unicode', method='xml')
	# # add a child to the positionH and positionV using xml variable
	# ET.SubElement(item, "wp:align").text = "center"
	# ET.SubElement(item2, "wp:align").text = "center"
	# print(ET.tostring(root))
	# else:
	# is_image = True
	# return is_image






	def center_paragraph(self):
	if self.contains_image():
	self.xparagraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

	def justify_paragraph(self):
	if(self.xparagraph.style.name == "Normal"):
	self.xparagraph.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY

	# def insert_paragraphs(self,images,template_doc):
	# empty_paragraph = Paragraph(template_doc.xdoc.add_paragraph(""),template_doc.id_,template_doc.container.paragraphs[-1].id_+1)
	# template_doc.add_paragraph(empty_paragraph)
	# template_xp = template_doc.xdoc.paragraphs[-1]
	# for run in self.xparagraph.runs:
	# new_run = template_xp.add_run(run.text)
	# if "pic:pic" in run.element.xml:
	# xml = run.element.xml
	# print(xml)
	# #check if there is the same image multiple times in the document
	# image_name = xml.split("pic:pic")[1].split('name="')[1].split('"')[0]
	# image_name = re.sub('[\s+]', '', image_name)
	# image_to_put = image_name.lower() + '.png'
	# #loop over all the cx and cy occurences and stop when both strings in between are numbers
	# width,height = get_positions(xml)
	# index_to_use = images.index("word/media/" + image_to_put)
	# new_run.add_picture(images[index_to_use], width=width, height=height)
	# # os.remove(images[0])
	# # return images

	def get_styles_in_paragraph(self):
	styles = [self.xparagraph.style.name]
	for run in self.xparagraph.runs:
	if run.style.name != "Default Paragraph Font":
	styles.append(run.style.name)
	return styles