Spaces:

Bogdan555
/

grantforge-api

Running

File size: 14,685 Bytes

afd56bc

import os
import markdown
import datetime
from bs4 import BeautifulSoup


def export_to_docx(
    content: str,
    output_path: str,
    template: str = "standard",
    project_title: str = "Wniosek o Dofinansowanie",
    company_name: str = "Brak nazwy",
    version: str = "1.0",
    date_str: str = "",
    extra_context: dict = None,
):
    """
    Eksportuje wygenerowany przez Wizarda wniosek do formatu Microsoft Word (DOCX).
    Wczytuje gotowy szablon docx z wymaganymi stalami i spisem treści.
    Integruje się z `docxtpl` by umożliwić elastyczne wstrzykiwanie zmiennych (np {{ beneficjent.krs }}).
    """
    if extra_context is None:
        extra_context = {}

    try:
        from docxtpl import DocxTemplate

        template_name = (
            template if template in ["standard", "official", "modern"] else "standard"
        )
        template_path = os.path.join(
            os.path.dirname(__file__),
            "..",
            "templates",
            f"template_{template_name}.docx",
        )

        if not os.path.exists(template_path):
            print(f"Brak pliku {template_path}, upewnij się, że wygenerowano szablony!")
            return False

        tpl = DocxTemplate(template_path)

        # Puste wartości dla formatowania markdown (usuwamy duplikujące title jeśli zaczyna się od H1)
        if content.startswith("# "):
            content = "\n".join(content.split("\n")[1:])

        # Przygotowanie pełnego kontekstu dla DocxTemplate (Jinja2 tags)
        # Zostawiamy 'tresc_wniosku' puste, bo zastąpimy ten paragraf natywnym kodem python-docx
        render_context = {
            "tytul_projektu": project_title,
            "nazwa_firmy": company_name,
            "data_generowania": date_str,
            "wersja": version,
            "tresc_wniosku": "",
        }
        # Scalenie z contextem przekazanym z bazy/endpoints
        render_context.update(extra_context)

        tpl.render(render_context)
        tpl.save(output_path)

        # 2. Otwieramy zapisany plik za pomocą natywnego python-docx
        import docx

        doc = docx.Document(output_path)

        # Usuwamy ostatni paragraf (który zawierał wyczyszczoną zmienną 'tresc_wniosku')
        if len(doc.paragraphs) > 0 and doc.paragraphs[-1].text.strip() == "":
            p = doc.paragraphs[-1]
            p._element.getparent().remove(p._element)

        # Konwersja MD do prostego HTML, a potem interpretacja BeautifulSoup
        html = markdown.markdown(content)
        soup = BeautifulSoup(html, "html.parser")

        for element in soup:
            if element.name in ["h1", "h2", "h3", "h4", "h5", "h6"]:
                level = int(element.name[1])
                try:
                    doc.add_paragraph(element.text, style=f"Heading {level}")
                except KeyError:
                    doc.add_heading(element.text, level=level)
            elif element.name == "p":
                # Złożona obsługa bold/italic
                p = doc.add_paragraph()
                if template == "official":
                    p.paragraph_format.alignment = 3  # Justify

                for child in element.children:
                    if child.name is None:
                        p.add_run(child.string)
                    elif child.name in ["strong", "b"]:
                        p.add_run(child.text).bold = True
                    elif child.name in ["em", "i"]:
                        p.add_run(child.text).italic = True
                    else:
                        p.add_run(child.text)
            elif element.name in ["ul", "ol"]:
                for li in element.find_all("li"):
                    style_name = (
                        "List Bullet" if element.name == "ul" else "List Number"
                    )
                    try:
                        p = doc.add_paragraph(style=style_name)
                    except KeyError:
                        p = doc.add_paragraph(style="Normal")
                        p.add_run("• ")
                    for child in li.children:
                        if child.name is None:
                            p.add_run(child.string)
                        elif child.name in ["strong", "b"]:
                            p.add_run(child.text).bold = True
                        elif child.name in ["em", "i"]:
                            p.add_run(child.text).italic = True
                        else:
                            p.add_run(child.text)
            elif element.name == "table":
                # Ulepszona obsługa tabel dla DOCX
                rows = element.find_all("tr")
                if rows:
                    cols = max(len(row.find_all(["th", "td"])) for row in rows)
                    table = doc.add_table(rows=0, cols=cols)
                    try:
                        table.style = "Light Shading Accent 1"
                    except KeyError:
                        table.style = "Table Grid"
                        
                    for idx_row, tr in enumerate(rows):
                        row = table.add_row()
                        cells = tr.find_all(["th", "td"])
                        for idx, cell in enumerate(cells):
                            if idx < cols:
                                p = row.cells[idx].paragraphs[0]
                                p.text = cell.text.strip()
                                # Zawsze pogrubiamy nagłówki (<th> lub pierwszy wiersz)
                                if cell.name == "th" or idx_row == 0:
                                    if p.runs:
                                        p.runs[0].bold = True

        doc.save(output_path)
        return True
    except Exception:
        import traceback

        print(f"Błąd eksportu do DOCX: {traceback.format_exc()}")
        return False


def get_pdf_css(template: str) -> str:
    if template == "official":
        return """
        @page { size: A4; margin: 2.5cm; }
        body { font-family: "DejaVu Sans", "Arial", serif; font-size: 11pt; line-height: 1.5; text-align: justify; color: #000; }
        h1, h2, h3 { color: #000; page-break-after: avoid; font-family: "DejaVu Sans", "Arial", serif; }
        h1 { border-bottom: 2px solid #000; padding-bottom: 5px; text-transform: uppercase; text-align: center; font-size: 16pt; margin-top: 2em; }
        h2 { font-size: 14pt; margin-top: 1.5em; }
        h3 { font-size: 12pt; margin-top: 1.2em; font-style: italic; }
        table { width: 100%; border-collapse: collapse; margin: 1em 0; page-break-inside: avoid; }
        th, td { border: 1px solid #000; padding: 8px; text-align: left; }
        p { margin-bottom: 1em; orphans: 3; widows: 3; }
        a { color: #000; text-decoration: none; }
        .toc { page-break-after: always; }
        .toc ul { list-style-type: none; padding-left: 1.5em; }
        .toc > ul { padding-left: 0; }
        .toc a { text-decoration: none; color: #000; }
        """
    elif template == "modern":
        return """
        @page { size: A4; margin: 2.5cm; }
        body { font-family: "DejaVu Sans", "Arial", sans-serif; font-size: 11pt; line-height: 1.6; color: #1e293b; background: #fff; }
        h1, h2, h3 { page-break-after: avoid; color: #0f172a; font-family: "DejaVu Sans", "Arial", sans-serif; }
        h1 { border-bottom: 2px solid #3b82f6; padding-bottom: 0.5em; font-size: 24pt; margin-top: 1em; }
        h2 { border-bottom: 1px solid #e2e8f0; padding-bottom: 0.3em; font-size: 18pt; margin-top: 1.5em; color: #2563eb; }
        h3 { font-size: 14pt; margin-top: 1.2em; color: #334155; }
        p { margin-bottom: 1em; text-align: justify; orphans: 3; widows: 3; }
        table { width: 100%; border-collapse: collapse; margin: 1.5em 0; background: #f8fafc; }
        th, td { border: 1px solid #e2e8f0; padding: 12px; text-align: left; }
        th { background-color: #f1f5f9; color: #334155; font-weight: bold; }
        ul, ol { margin-bottom: 1em; padding-left: 2em; }
        li { margin-bottom: 0.5em; }
        .toc { page-break-after: always; padding: 2em; background: #f8fafc; border-radius: 8px; }
        .toc ul { list-style-type: none; padding-left: 1.5em; }
        .toc > ul { padding-left: 0; }
        .toc a { text-decoration: none; color: #4f46e5; border-bottom: 1px dotted #cbd5e1; display: block; padding-bottom: 5px; margin-bottom: 5px; }
        """
    elif template == "enterprise":
        return """
        @page { size: A4; margin: 2.5cm; }
        body { font-family: "DejaVu Sans", "Arial", sans-serif; font-size: 11pt; line-height: 1.6; color: #1f2937; background: #fff; }
        h1, h2, h3 { page-break-after: avoid; color: #1e3a8a; font-family: "DejaVu Sans", "Arial", sans-serif; }
        h1 { border-bottom: 2px solid #10b981; padding-bottom: 0.5em; font-size: 24pt; margin-top: 1em; }
        h2 { border-bottom: 1px solid #e5e7eb; padding-bottom: 0.3em; font-size: 18pt; margin-top: 1.5em; color: #1e40af; }
        h3 { font-size: 14pt; margin-top: 1.2em; color: #374151; }
        p { margin-bottom: 1em; text-align: justify; orphans: 3; widows: 3; }
        table { width: 100%; border-collapse: collapse; margin: 1.5em 0; background: #ffffff; }
        th, td { border: 1px solid #d1d5db; padding: 12px; text-align: left; }
        th { background-color: #f3f4f6; color: #1f2937; font-weight: bold; }
        ul, ol { margin-bottom: 1em; padding-left: 2em; }
        li { margin-bottom: 0.5em; }
        .toc { page-break-after: always; padding: 2em; background: #f9fafb; border-radius: 8px; border-left: 4px solid #10b981; }
        .toc ul { list-style-type: none; padding-left: 1.5em; }
        .toc > ul { padding-left: 0; }
        .toc a { text-decoration: none; color: #1e3a8a; border-bottom: 1px dotted #9ca3af; display: block; padding-bottom: 5px; margin-bottom: 5px; }
        """
    else:  # standard
        return """
        @page { size: A4; margin: 2cm; }
        body { font-family: "DejaVu Sans", "Arial", sans-serif; font-size: 11pt; line-height: 1.6; color: #333; }
        h1, h2, h3 { page-break-after: avoid; }
        h1 { border-bottom: 2px solid #3498db; padding-bottom: 10px; color: #2c3e50; }
        h2 { color: #2980b9; margin-top: 1.5em; }
        table { width: 100%; border-collapse: collapse; margin: 1em 0; }
        th, td { border: 1px solid #bdc3c7; padding: 8px; text-align: left; }
        th { background-color: #ecf0f1; }
        p { margin-bottom: 1em; text-align: justify; orphans: 3; widows: 3; }
        .toc { page-break-after: always; margin-top: 2em; }
        .toc ul { list-style-type: none; padding-left: 1.5em; }
        .toc > ul { padding-left: 0; }
        .toc a { text-decoration: none; color: #2980b9; border-bottom: 1px dotted #bdc3c7; display: block; padding-bottom: 5px; margin-bottom: 5px; }
        """


def export_to_pdf(
    content: str,
    output_path: str,
    template: str = "standard",
    project_title: str = "Wniosek o Dofinansowanie",
    company_name: str = "Brak nazwy",
    version: str = "1.0",
    date_str: str = "",
):
    """
    Eksportuje wniosek do PDF wykorzystując WeasyPrint.
    """
    try:
        from xhtml2pdf import pisa
    except ImportError:
        print("Nie mozna pobrac xhtml2pdf.")
        raise Exception("Należy zainstalować xhtml2pdf (pip install xhtml2pdf).")

    try:
        # Usuwamy ewentualny nadmiarowy title na poczatku markdown by nie dublowac cover page
        if content.startswith("# "):
            content = "\n".join(content.split("\n")[1:])

        md_content = f"[TOC]\n\n{content}"
        html_body = markdown.markdown(
            md_content, 
            extensions=["tables", "fenced_code", "toc"],
            extension_configs={'toc': {'title': 'Spis Treści'}}
        )
        import urllib.request
        font_dir = os.path.join(os.path.dirname(__file__), "..", "assets")
        os.makedirs(font_dir, exist_ok=True)
        font_path = os.path.join(font_dir, "Roboto-Regular.ttf")
        
        if not os.path.exists(font_path):
            print("Czcionka Roboto nie istnieje, pobieram...")
            font_url = "https://github.com/google/fonts/raw/main/ofl/roboto/Roboto-Regular.ttf"
            try:
                urllib.request.urlretrieve(font_url, font_path)
            except Exception as e:
                print(f"Nie udało się pobrać czcionki: {e}")
                font_path = ""
        
        font_face = ""
        if font_path:
            font_face = f"""
            @font-face {{
                font-family: "Roboto";
                src: url("file://{font_path}");
            }}
            """
        css_style = font_face + get_pdf_css(template)


        if not date_str:
            date_str = datetime.datetime.now().strftime("%d.%m.%Y")

        logo_html = ""
        if template == "enterprise":
            logo_html = '<div style="color: #10b981; font-size: 24pt; font-weight: bold; margin-bottom: 20px;">♦ GrantForge</div>'

        # Note: xhtml2pdf does not fully support flexbox, so we use standard block centering.
        html_content = f"""
        <!DOCTYPE html>
        <html>
        <head>
            <meta charset="utf-8">
            <style>
                {css_style}
                .cover-page {{
                    text-align: center;
                    padding-top: 250px;
                }}
            </style>
        </head>
        <body>
            <div class="cover-page">
                {logo_html}
                <h1 style="border: none; font-size: 32pt; margin-bottom: 20px;">{project_title}</h1>
                <p style="font-size: 16pt; color: #2c3e50; font-weight: bold;">{company_name}</p>
                <div style="margin-top: 150px;">
                    <p style="font-size: 11pt; color: #95a5a6; font-weight: bold; margin-bottom: 5px;">Wygenerowano z użyciem systemu wsparcia DotacjeAI</p>
                    <p style="font-size: 10pt; color: #95a5a6;">Dokument utworzony: {date_str} | Wersja: {version}</p>
                </div>
            </div>
            <!-- Wymuszenie nowej strony po stronie tytułowej -->
            <pdf:nextpage />
            {html_body}
        </body>
        </html>
        """

        with open(output_path, "wb") as pdf_file:
            pisa_status = pisa.CreatePDF(html_content.encode("utf-8"), dest=pdf_file, encoding='utf-8')
            if pisa_status.err:
                print(f"Błąd pisa: {pisa_status.err}")
                return False

        return True
    except Exception:
        import traceback

        print(f"Błąd eksportu do PDF: {traceback.format_exc()}")
        return False