Spaces:

Bogdan555
/

grantforge-api

Running

grantforge-api / backend /utils /export_documents.py

GrantForge Bot

Deploy to Hugging Face

afd56bc about 12 hours ago

14.7 kB

	import os
	import markdown
	import datetime
	from bs4 import BeautifulSoup


	def export_to_docx(
	content: str,
	output_path: str,
	template: str = "standard",
	project_title: str = "Wniosek o Dofinansowanie",
	company_name: str = "Brak nazwy",
	version: str = "1.0",
	date_str: str = "",
	extra_context: dict = None,
	):
	"""
	Eksportuje wygenerowany przez Wizarda wniosek do formatu Microsoft Word (DOCX).
	Wczytuje gotowy szablon docx z wymaganymi stalami i spisem treści.
	Integruje się z `docxtpl` by umożliwić elastyczne wstrzykiwanie zmiennych (np {{ beneficjent.krs }}).
	"""
	if extra_context is None:
	extra_context = {}

	try:
	from docxtpl import DocxTemplate

	template_name = (
	template if template in ["standard", "official", "modern"] else "standard"
	)
	template_path = os.path.join(
	os.path.dirname(__file__),
	"..",
	"templates",
	f"template_{template_name}.docx",
	)

	if not os.path.exists(template_path):
	print(f"Brak pliku {template_path}, upewnij się, że wygenerowano szablony!")
	return False

	tpl = DocxTemplate(template_path)

	# Puste wartości dla formatowania markdown (usuwamy duplikujące title jeśli zaczyna się od H1)
	if content.startswith("# "):
	content = "\n".join(content.split("\n")[1:])

	# Przygotowanie pełnego kontekstu dla DocxTemplate (Jinja2 tags)
	# Zostawiamy 'tresc_wniosku' puste, bo zastąpimy ten paragraf natywnym kodem python-docx
	render_context = {
	"tytul_projektu": project_title,
	"nazwa_firmy": company_name,
	"data_generowania": date_str,
	"wersja": version,
	"tresc_wniosku": "",
	}
	# Scalenie z contextem przekazanym z bazy/endpoints
	render_context.update(extra_context)

	tpl.render(render_context)
	tpl.save(output_path)

	# 2. Otwieramy zapisany plik za pomocą natywnego python-docx
	import docx

	doc = docx.Document(output_path)

	# Usuwamy ostatni paragraf (który zawierał wyczyszczoną zmienną 'tresc_wniosku')
	if len(doc.paragraphs) > 0 and doc.paragraphs[-1].text.strip() == "":
	p = doc.paragraphs[-1]
	p._element.getparent().remove(p._element)

	# Konwersja MD do prostego HTML, a potem interpretacja BeautifulSoup
	html = markdown.markdown(content)
	soup = BeautifulSoup(html, "html.parser")

	for element in soup:
	if element.name in ["h1", "h2", "h3", "h4", "h5", "h6"]:
	level = int(element.name[1])
	try:
	doc.add_paragraph(element.text, style=f"Heading {level}")
	except KeyError:
	doc.add_heading(element.text, level=level)
	elif element.name == "p":
	# Złożona obsługa bold/italic
	p = doc.add_paragraph()
	if template == "official":
	p.paragraph_format.alignment = 3 # Justify

	for child in element.children:
	if child.name is None:
	p.add_run(child.string)
	elif child.name in ["strong", "b"]:
	p.add_run(child.text).bold = True
	elif child.name in ["em", "i"]:
	p.add_run(child.text).italic = True
	else:
	p.add_run(child.text)
	elif element.name in ["ul", "ol"]:
	for li in element.find_all("li"):
	style_name = (
	"List Bullet" if element.name == "ul" else "List Number"
	)
	try:
	p = doc.add_paragraph(style=style_name)
	except KeyError:
	p = doc.add_paragraph(style="Normal")
	p.add_run("• ")
	for child in li.children:
	if child.name is None:
	p.add_run(child.string)
	elif child.name in ["strong", "b"]:
	p.add_run(child.text).bold = True
	elif child.name in ["em", "i"]:
	p.add_run(child.text).italic = True
	else:
	p.add_run(child.text)
	elif element.name == "table":
	# Ulepszona obsługa tabel dla DOCX
	rows = element.find_all("tr")
	if rows:
	cols = max(len(row.find_all(["th", "td"])) for row in rows)
	table = doc.add_table(rows=0, cols=cols)
	try:
	table.style = "Light Shading Accent 1"
	except KeyError:
	table.style = "Table Grid"

	for idx_row, tr in enumerate(rows):
	row = table.add_row()
	cells = tr.find_all(["th", "td"])
	for idx, cell in enumerate(cells):
	if idx < cols:
	p = row.cells[idx].paragraphs[0]
	p.text = cell.text.strip()
	# Zawsze pogrubiamy nagłówki (<th> lub pierwszy wiersz)
	if cell.name == "th" or idx_row == 0:
	if p.runs:
	p.runs[0].bold = True

	doc.save(output_path)
	return True
	except Exception:
	import traceback

	print(f"Błąd eksportu do DOCX: {traceback.format_exc()}")
	return False


	def get_pdf_css(template: str) -> str:
	if template == "official":
	return """
	@page { size: A4; margin: 2.5cm; }
	body { font-family: "DejaVu Sans", "Arial", serif; font-size: 11pt; line-height: 1.5; text-align: justify; color: #000; }
	h1, h2, h3 { color: #000; page-break-after: avoid; font-family: "DejaVu Sans", "Arial", serif; }
	h1 { border-bottom: 2px solid #000; padding-bottom: 5px; text-transform: uppercase; text-align: center; font-size: 16pt; margin-top: 2em; }
	h2 { font-size: 14pt; margin-top: 1.5em; }
	h3 { font-size: 12pt; margin-top: 1.2em; font-style: italic; }
	table { width: 100%; border-collapse: collapse; margin: 1em 0; page-break-inside: avoid; }
	th, td { border: 1px solid #000; padding: 8px; text-align: left; }
	p { margin-bottom: 1em; orphans: 3; widows: 3; }
	a { color: #000; text-decoration: none; }
	.toc { page-break-after: always; }
	.toc ul { list-style-type: none; padding-left: 1.5em; }
	.toc > ul { padding-left: 0; }
	.toc a { text-decoration: none; color: #000; }
	"""
	elif template == "modern":
	return """
	@page { size: A4; margin: 2.5cm; }
	body { font-family: "DejaVu Sans", "Arial", sans-serif; font-size: 11pt; line-height: 1.6; color: #1e293b; background: #fff; }
	h1, h2, h3 { page-break-after: avoid; color: #0f172a; font-family: "DejaVu Sans", "Arial", sans-serif; }
	h1 { border-bottom: 2px solid #3b82f6; padding-bottom: 0.5em; font-size: 24pt; margin-top: 1em; }
	h2 { border-bottom: 1px solid #e2e8f0; padding-bottom: 0.3em; font-size: 18pt; margin-top: 1.5em; color: #2563eb; }
	h3 { font-size: 14pt; margin-top: 1.2em; color: #334155; }
	p { margin-bottom: 1em; text-align: justify; orphans: 3; widows: 3; }
	table { width: 100%; border-collapse: collapse; margin: 1.5em 0; background: #f8fafc; }
	th, td { border: 1px solid #e2e8f0; padding: 12px; text-align: left; }
	th { background-color: #f1f5f9; color: #334155; font-weight: bold; }
	ul, ol { margin-bottom: 1em; padding-left: 2em; }
	li { margin-bottom: 0.5em; }
	.toc { page-break-after: always; padding: 2em; background: #f8fafc; border-radius: 8px; }
	.toc ul { list-style-type: none; padding-left: 1.5em; }
	.toc > ul { padding-left: 0; }
	.toc a { text-decoration: none; color: #4f46e5; border-bottom: 1px dotted #cbd5e1; display: block; padding-bottom: 5px; margin-bottom: 5px; }
	"""
	elif template == "enterprise":
	return """
	@page { size: A4; margin: 2.5cm; }
	body { font-family: "DejaVu Sans", "Arial", sans-serif; font-size: 11pt; line-height: 1.6; color: #1f2937; background: #fff; }
	h1, h2, h3 { page-break-after: avoid; color: #1e3a8a; font-family: "DejaVu Sans", "Arial", sans-serif; }
	h1 { border-bottom: 2px solid #10b981; padding-bottom: 0.5em; font-size: 24pt; margin-top: 1em; }
	h2 { border-bottom: 1px solid #e5e7eb; padding-bottom: 0.3em; font-size: 18pt; margin-top: 1.5em; color: #1e40af; }
	h3 { font-size: 14pt; margin-top: 1.2em; color: #374151; }
	p { margin-bottom: 1em; text-align: justify; orphans: 3; widows: 3; }
	table { width: 100%; border-collapse: collapse; margin: 1.5em 0; background: #ffffff; }
	th, td { border: 1px solid #d1d5db; padding: 12px; text-align: left; }
	th { background-color: #f3f4f6; color: #1f2937; font-weight: bold; }
	ul, ol { margin-bottom: 1em; padding-left: 2em; }
	li { margin-bottom: 0.5em; }
	.toc { page-break-after: always; padding: 2em; background: #f9fafb; border-radius: 8px; border-left: 4px solid #10b981; }
	.toc ul { list-style-type: none; padding-left: 1.5em; }
	.toc > ul { padding-left: 0; }
	.toc a { text-decoration: none; color: #1e3a8a; border-bottom: 1px dotted #9ca3af; display: block; padding-bottom: 5px; margin-bottom: 5px; }
	"""
	else: # standard
	return """
	@page { size: A4; margin: 2cm; }
	body { font-family: "DejaVu Sans", "Arial", sans-serif; font-size: 11pt; line-height: 1.6; color: #333; }
	h1, h2, h3 { page-break-after: avoid; }
	h1 { border-bottom: 2px solid #3498db; padding-bottom: 10px; color: #2c3e50; }
	h2 { color: #2980b9; margin-top: 1.5em; }
	table { width: 100%; border-collapse: collapse; margin: 1em 0; }
	th, td { border: 1px solid #bdc3c7; padding: 8px; text-align: left; }
	th { background-color: #ecf0f1; }
	p { margin-bottom: 1em; text-align: justify; orphans: 3; widows: 3; }
	.toc { page-break-after: always; margin-top: 2em; }
	.toc ul { list-style-type: none; padding-left: 1.5em; }
	.toc > ul { padding-left: 0; }
	.toc a { text-decoration: none; color: #2980b9; border-bottom: 1px dotted #bdc3c7; display: block; padding-bottom: 5px; margin-bottom: 5px; }
	"""


	def export_to_pdf(
	content: str,
	output_path: str,
	template: str = "standard",
	project_title: str = "Wniosek o Dofinansowanie",
	company_name: str = "Brak nazwy",
	version: str = "1.0",
	date_str: str = "",
	):
	"""
	Eksportuje wniosek do PDF wykorzystując WeasyPrint.
	"""
	try:
	from xhtml2pdf import pisa
	except ImportError:
	print("Nie mozna pobrac xhtml2pdf.")
	raise Exception("Należy zainstalować xhtml2pdf (pip install xhtml2pdf).")

	try:
	# Usuwamy ewentualny nadmiarowy title na poczatku markdown by nie dublowac cover page
	if content.startswith("# "):
	content = "\n".join(content.split("\n")[1:])

	md_content = f"[TOC]\n\n{content}"
	html_body = markdown.markdown(
	md_content,
	extensions=["tables", "fenced_code", "toc"],
	extension_configs={'toc': {'title': 'Spis Treści'}}
	)
	import urllib.request
	font_dir = os.path.join(os.path.dirname(__file__), "..", "assets")
	os.makedirs(font_dir, exist_ok=True)
	font_path = os.path.join(font_dir, "Roboto-Regular.ttf")

	if not os.path.exists(font_path):
	print("Czcionka Roboto nie istnieje, pobieram...")
	font_url = "https://github.com/google/fonts/raw/main/ofl/roboto/Roboto-Regular.ttf"
	try:
	urllib.request.urlretrieve(font_url, font_path)
	except Exception as e:
	print(f"Nie udało się pobrać czcionki: {e}")
	font_path = ""

	font_face = ""
	if font_path:
	font_face = f"""
	@font-face {{
	font-family: "Roboto";
	src: url("file://{font_path}");
	}}
	"""
	css_style = font_face + get_pdf_css(template)


	if not date_str:
	date_str = datetime.datetime.now().strftime("%d.%m.%Y")

	logo_html = ""
	if template == "enterprise":
	logo_html = '<div style="color: #10b981; font-size: 24pt; font-weight: bold; margin-bottom: 20px;">♦ GrantForge</div>'

	# Note: xhtml2pdf does not fully support flexbox, so we use standard block centering.
	html_content = f"""
	<!DOCTYPE html>
	<html>
	<head>
	<meta charset="utf-8">
	<style>
	{css_style}
	.cover-page {{
	text-align: center;
	padding-top: 250px;
	}}
	</style>
	</head>
	<body>
	<div class="cover-page">
	{logo_html}
	<h1 style="border: none; font-size: 32pt; margin-bottom: 20px;">{project_title}</h1>
	<p style="font-size: 16pt; color: #2c3e50; font-weight: bold;">{company_name}</p>
	<div style="margin-top: 150px;">
	<p style="font-size: 11pt; color: #95a5a6; font-weight: bold; margin-bottom: 5px;">Wygenerowano z użyciem systemu wsparcia DotacjeAI</p>
	<p style="font-size: 10pt; color: #95a5a6;">Dokument utworzony: {date_str} \| Wersja: {version}</p>
	</div>
	</div>
	<!-- Wymuszenie nowej strony po stronie tytułowej -->
	<pdf:nextpage />
	{html_body}
	</body>
	</html>
	"""

	with open(output_path, "wb") as pdf_file:
	pisa_status = pisa.CreatePDF(html_content.encode("utf-8"), dest=pdf_file, encoding='utf-8')
	if pisa_status.err:
	print(f"Błąd pisa: {pisa_status.err}")
	return False

	return True
	except Exception:
	import traceback

	print(f"Błąd eksportu do PDF: {traceback.format_exc()}")
	return False