grantforge-api / backend /utils /export_documents.py
GrantForge Bot
Deploy to Hugging Face
afd56bc
import os
import markdown
import datetime
from bs4 import BeautifulSoup
def export_to_docx(
content: str,
output_path: str,
template: str = "standard",
project_title: str = "Wniosek o Dofinansowanie",
company_name: str = "Brak nazwy",
version: str = "1.0",
date_str: str = "",
extra_context: dict = None,
):
"""
Eksportuje wygenerowany przez Wizarda wniosek do formatu Microsoft Word (DOCX).
Wczytuje gotowy szablon docx z wymaganymi stalami i spisem treści.
Integruje się z `docxtpl` by umożliwić elastyczne wstrzykiwanie zmiennych (np {{ beneficjent.krs }}).
"""
if extra_context is None:
extra_context = {}
try:
from docxtpl import DocxTemplate
template_name = (
template if template in ["standard", "official", "modern"] else "standard"
)
template_path = os.path.join(
os.path.dirname(__file__),
"..",
"templates",
f"template_{template_name}.docx",
)
if not os.path.exists(template_path):
print(f"Brak pliku {template_path}, upewnij się, że wygenerowano szablony!")
return False
tpl = DocxTemplate(template_path)
# Puste wartości dla formatowania markdown (usuwamy duplikujące title jeśli zaczyna się od H1)
if content.startswith("# "):
content = "\n".join(content.split("\n")[1:])
# Przygotowanie pełnego kontekstu dla DocxTemplate (Jinja2 tags)
# Zostawiamy 'tresc_wniosku' puste, bo zastąpimy ten paragraf natywnym kodem python-docx
render_context = {
"tytul_projektu": project_title,
"nazwa_firmy": company_name,
"data_generowania": date_str,
"wersja": version,
"tresc_wniosku": "",
}
# Scalenie z contextem przekazanym z bazy/endpoints
render_context.update(extra_context)
tpl.render(render_context)
tpl.save(output_path)
# 2. Otwieramy zapisany plik za pomocą natywnego python-docx
import docx
doc = docx.Document(output_path)
# Usuwamy ostatni paragraf (który zawierał wyczyszczoną zmienną 'tresc_wniosku')
if len(doc.paragraphs) > 0 and doc.paragraphs[-1].text.strip() == "":
p = doc.paragraphs[-1]
p._element.getparent().remove(p._element)
# Konwersja MD do prostego HTML, a potem interpretacja BeautifulSoup
html = markdown.markdown(content)
soup = BeautifulSoup(html, "html.parser")
for element in soup:
if element.name in ["h1", "h2", "h3", "h4", "h5", "h6"]:
level = int(element.name[1])
try:
doc.add_paragraph(element.text, style=f"Heading {level}")
except KeyError:
doc.add_heading(element.text, level=level)
elif element.name == "p":
# Złożona obsługa bold/italic
p = doc.add_paragraph()
if template == "official":
p.paragraph_format.alignment = 3 # Justify
for child in element.children:
if child.name is None:
p.add_run(child.string)
elif child.name in ["strong", "b"]:
p.add_run(child.text).bold = True
elif child.name in ["em", "i"]:
p.add_run(child.text).italic = True
else:
p.add_run(child.text)
elif element.name in ["ul", "ol"]:
for li in element.find_all("li"):
style_name = (
"List Bullet" if element.name == "ul" else "List Number"
)
try:
p = doc.add_paragraph(style=style_name)
except KeyError:
p = doc.add_paragraph(style="Normal")
p.add_run("• ")
for child in li.children:
if child.name is None:
p.add_run(child.string)
elif child.name in ["strong", "b"]:
p.add_run(child.text).bold = True
elif child.name in ["em", "i"]:
p.add_run(child.text).italic = True
else:
p.add_run(child.text)
elif element.name == "table":
# Ulepszona obsługa tabel dla DOCX
rows = element.find_all("tr")
if rows:
cols = max(len(row.find_all(["th", "td"])) for row in rows)
table = doc.add_table(rows=0, cols=cols)
try:
table.style = "Light Shading Accent 1"
except KeyError:
table.style = "Table Grid"
for idx_row, tr in enumerate(rows):
row = table.add_row()
cells = tr.find_all(["th", "td"])
for idx, cell in enumerate(cells):
if idx < cols:
p = row.cells[idx].paragraphs[0]
p.text = cell.text.strip()
# Zawsze pogrubiamy nagłówki (<th> lub pierwszy wiersz)
if cell.name == "th" or idx_row == 0:
if p.runs:
p.runs[0].bold = True
doc.save(output_path)
return True
except Exception:
import traceback
print(f"Błąd eksportu do DOCX: {traceback.format_exc()}")
return False
def get_pdf_css(template: str) -> str:
if template == "official":
return """
@page { size: A4; margin: 2.5cm; }
body { font-family: "DejaVu Sans", "Arial", serif; font-size: 11pt; line-height: 1.5; text-align: justify; color: #000; }
h1, h2, h3 { color: #000; page-break-after: avoid; font-family: "DejaVu Sans", "Arial", serif; }
h1 { border-bottom: 2px solid #000; padding-bottom: 5px; text-transform: uppercase; text-align: center; font-size: 16pt; margin-top: 2em; }
h2 { font-size: 14pt; margin-top: 1.5em; }
h3 { font-size: 12pt; margin-top: 1.2em; font-style: italic; }
table { width: 100%; border-collapse: collapse; margin: 1em 0; page-break-inside: avoid; }
th, td { border: 1px solid #000; padding: 8px; text-align: left; }
p { margin-bottom: 1em; orphans: 3; widows: 3; }
a { color: #000; text-decoration: none; }
.toc { page-break-after: always; }
.toc ul { list-style-type: none; padding-left: 1.5em; }
.toc > ul { padding-left: 0; }
.toc a { text-decoration: none; color: #000; }
"""
elif template == "modern":
return """
@page { size: A4; margin: 2.5cm; }
body { font-family: "DejaVu Sans", "Arial", sans-serif; font-size: 11pt; line-height: 1.6; color: #1e293b; background: #fff; }
h1, h2, h3 { page-break-after: avoid; color: #0f172a; font-family: "DejaVu Sans", "Arial", sans-serif; }
h1 { border-bottom: 2px solid #3b82f6; padding-bottom: 0.5em; font-size: 24pt; margin-top: 1em; }
h2 { border-bottom: 1px solid #e2e8f0; padding-bottom: 0.3em; font-size: 18pt; margin-top: 1.5em; color: #2563eb; }
h3 { font-size: 14pt; margin-top: 1.2em; color: #334155; }
p { margin-bottom: 1em; text-align: justify; orphans: 3; widows: 3; }
table { width: 100%; border-collapse: collapse; margin: 1.5em 0; background: #f8fafc; }
th, td { border: 1px solid #e2e8f0; padding: 12px; text-align: left; }
th { background-color: #f1f5f9; color: #334155; font-weight: bold; }
ul, ol { margin-bottom: 1em; padding-left: 2em; }
li { margin-bottom: 0.5em; }
.toc { page-break-after: always; padding: 2em; background: #f8fafc; border-radius: 8px; }
.toc ul { list-style-type: none; padding-left: 1.5em; }
.toc > ul { padding-left: 0; }
.toc a { text-decoration: none; color: #4f46e5; border-bottom: 1px dotted #cbd5e1; display: block; padding-bottom: 5px; margin-bottom: 5px; }
"""
elif template == "enterprise":
return """
@page { size: A4; margin: 2.5cm; }
body { font-family: "DejaVu Sans", "Arial", sans-serif; font-size: 11pt; line-height: 1.6; color: #1f2937; background: #fff; }
h1, h2, h3 { page-break-after: avoid; color: #1e3a8a; font-family: "DejaVu Sans", "Arial", sans-serif; }
h1 { border-bottom: 2px solid #10b981; padding-bottom: 0.5em; font-size: 24pt; margin-top: 1em; }
h2 { border-bottom: 1px solid #e5e7eb; padding-bottom: 0.3em; font-size: 18pt; margin-top: 1.5em; color: #1e40af; }
h3 { font-size: 14pt; margin-top: 1.2em; color: #374151; }
p { margin-bottom: 1em; text-align: justify; orphans: 3; widows: 3; }
table { width: 100%; border-collapse: collapse; margin: 1.5em 0; background: #ffffff; }
th, td { border: 1px solid #d1d5db; padding: 12px; text-align: left; }
th { background-color: #f3f4f6; color: #1f2937; font-weight: bold; }
ul, ol { margin-bottom: 1em; padding-left: 2em; }
li { margin-bottom: 0.5em; }
.toc { page-break-after: always; padding: 2em; background: #f9fafb; border-radius: 8px; border-left: 4px solid #10b981; }
.toc ul { list-style-type: none; padding-left: 1.5em; }
.toc > ul { padding-left: 0; }
.toc a { text-decoration: none; color: #1e3a8a; border-bottom: 1px dotted #9ca3af; display: block; padding-bottom: 5px; margin-bottom: 5px; }
"""
else: # standard
return """
@page { size: A4; margin: 2cm; }
body { font-family: "DejaVu Sans", "Arial", sans-serif; font-size: 11pt; line-height: 1.6; color: #333; }
h1, h2, h3 { page-break-after: avoid; }
h1 { border-bottom: 2px solid #3498db; padding-bottom: 10px; color: #2c3e50; }
h2 { color: #2980b9; margin-top: 1.5em; }
table { width: 100%; border-collapse: collapse; margin: 1em 0; }
th, td { border: 1px solid #bdc3c7; padding: 8px; text-align: left; }
th { background-color: #ecf0f1; }
p { margin-bottom: 1em; text-align: justify; orphans: 3; widows: 3; }
.toc { page-break-after: always; margin-top: 2em; }
.toc ul { list-style-type: none; padding-left: 1.5em; }
.toc > ul { padding-left: 0; }
.toc a { text-decoration: none; color: #2980b9; border-bottom: 1px dotted #bdc3c7; display: block; padding-bottom: 5px; margin-bottom: 5px; }
"""
def export_to_pdf(
content: str,
output_path: str,
template: str = "standard",
project_title: str = "Wniosek o Dofinansowanie",
company_name: str = "Brak nazwy",
version: str = "1.0",
date_str: str = "",
):
"""
Eksportuje wniosek do PDF wykorzystując WeasyPrint.
"""
try:
from xhtml2pdf import pisa
except ImportError:
print("Nie mozna pobrac xhtml2pdf.")
raise Exception("Należy zainstalować xhtml2pdf (pip install xhtml2pdf).")
try:
# Usuwamy ewentualny nadmiarowy title na poczatku markdown by nie dublowac cover page
if content.startswith("# "):
content = "\n".join(content.split("\n")[1:])
md_content = f"[TOC]\n\n{content}"
html_body = markdown.markdown(
md_content,
extensions=["tables", "fenced_code", "toc"],
extension_configs={'toc': {'title': 'Spis Treści'}}
)
import urllib.request
font_dir = os.path.join(os.path.dirname(__file__), "..", "assets")
os.makedirs(font_dir, exist_ok=True)
font_path = os.path.join(font_dir, "Roboto-Regular.ttf")
if not os.path.exists(font_path):
print("Czcionka Roboto nie istnieje, pobieram...")
font_url = "https://github.com/google/fonts/raw/main/ofl/roboto/Roboto-Regular.ttf"
try:
urllib.request.urlretrieve(font_url, font_path)
except Exception as e:
print(f"Nie udało się pobrać czcionki: {e}")
font_path = ""
font_face = ""
if font_path:
font_face = f"""
@font-face {{
font-family: "Roboto";
src: url("file://{font_path}");
}}
"""
css_style = font_face + get_pdf_css(template)
if not date_str:
date_str = datetime.datetime.now().strftime("%d.%m.%Y")
logo_html = ""
if template == "enterprise":
logo_html = '<div style="color: #10b981; font-size: 24pt; font-weight: bold; margin-bottom: 20px;">♦ GrantForge</div>'
# Note: xhtml2pdf does not fully support flexbox, so we use standard block centering.
html_content = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<style>
{css_style}
.cover-page {{
text-align: center;
padding-top: 250px;
}}
</style>
</head>
<body>
<div class="cover-page">
{logo_html}
<h1 style="border: none; font-size: 32pt; margin-bottom: 20px;">{project_title}</h1>
<p style="font-size: 16pt; color: #2c3e50; font-weight: bold;">{company_name}</p>
<div style="margin-top: 150px;">
<p style="font-size: 11pt; color: #95a5a6; font-weight: bold; margin-bottom: 5px;">Wygenerowano z użyciem systemu wsparcia DotacjeAI</p>
<p style="font-size: 10pt; color: #95a5a6;">Dokument utworzony: {date_str} | Wersja: {version}</p>
</div>
</div>
<!-- Wymuszenie nowej strony po stronie tytułowej -->
<pdf:nextpage />
{html_body}
</body>
</html>
"""
with open(output_path, "wb") as pdf_file:
pisa_status = pisa.CreatePDF(html_content.encode("utf-8"), dest=pdf_file, encoding='utf-8')
if pisa_status.err:
print(f"Błąd pisa: {pisa_status.err}")
return False
return True
except Exception:
import traceback
print(f"Błąd eksportu do PDF: {traceback.format_exc()}")
return False