Spaces:
Sleeping
Sleeping
Changed to weasyprint
Browse files
app.py
CHANGED
|
@@ -13,13 +13,13 @@ from typing import Dict, List, Any, Tuple, Optional, Pattern, Callable
|
|
| 13 |
from dataclasses import dataclass
|
| 14 |
from functools import wraps
|
| 15 |
from abc import ABC, abstractmethod
|
| 16 |
-
import pdfkit # Import pdfkit for PDF generation
|
| 17 |
import tempfile # For creating temporary files
|
| 18 |
|
| 19 |
# Third-party imports
|
| 20 |
import gradio as gr
|
| 21 |
from docx import Document
|
| 22 |
from colorama import init, Fore, Style
|
|
|
|
| 23 |
|
| 24 |
# Constants
|
| 25 |
DEFAULT_PORT = 7860
|
|
@@ -354,6 +354,11 @@ class DocumentCheckerConfig:
|
|
| 354 |
description="Ignore 'title 14, Code of Federal Regulations (14 CFR)'",
|
| 355 |
is_error=False
|
| 356 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
PatternConfig(
|
| 358 |
pattern=r'\bUSC\b',
|
| 359 |
description="USC should be U.S.C.", # Per GPO Style Manual
|
|
@@ -2783,19 +2788,15 @@ def create_interface():
|
|
| 2783 |
# Function to generate PDF and provide it for download
|
| 2784 |
def generate_pdf(html_content):
|
| 2785 |
try:
|
| 2786 |
-
# Specify the path to wkhtmltopdf
|
| 2787 |
-
path_wkhtmltopdf = '/usr/bin/wkhtmltopdf'
|
| 2788 |
-
config = pdfkit.configuration(wkhtmltopdf=path_wkhtmltopdf)
|
| 2789 |
-
|
| 2790 |
# Use a temporary file to store the PDF
|
| 2791 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_pdf:
|
| 2792 |
-
# Convert HTML to PDF
|
| 2793 |
-
|
| 2794 |
|
| 2795 |
# Return the path to the PDF file
|
| 2796 |
return gr.update(value=tmp_pdf.name, visible=True)
|
| 2797 |
except Exception as e:
|
| 2798 |
-
logging.error(f"Error generating PDF: {str(e)}")
|
| 2799 |
return gr.update(value=None, visible=False)
|
| 2800 |
|
| 2801 |
# When the download button is clicked, generate the PDF
|
|
|
|
| 13 |
from dataclasses import dataclass
|
| 14 |
from functools import wraps
|
| 15 |
from abc import ABC, abstractmethod
|
|
|
|
| 16 |
import tempfile # For creating temporary files
|
| 17 |
|
| 18 |
# Third-party imports
|
| 19 |
import gradio as gr
|
| 20 |
from docx import Document
|
| 21 |
from colorama import init, Fore, Style
|
| 22 |
+
from weasyprint import HTML
|
| 23 |
|
| 24 |
# Constants
|
| 25 |
DEFAULT_PORT = 7860
|
|
|
|
| 354 |
description="Ignore 'title 14, Code of Federal Regulations (14 CFR)'",
|
| 355 |
is_error=False
|
| 356 |
),
|
| 357 |
+
PatternConfig(
|
| 358 |
+
pattern=r'\bAD Compliance Team \(AD CRT\)\b',
|
| 359 |
+
description="Ignore 'AD Compliance Team (AD CRT)'",
|
| 360 |
+
is_error=False
|
| 361 |
+
),
|
| 362 |
PatternConfig(
|
| 363 |
pattern=r'\bUSC\b',
|
| 364 |
description="USC should be U.S.C.", # Per GPO Style Manual
|
|
|
|
| 2788 |
# Function to generate PDF and provide it for download
|
| 2789 |
def generate_pdf(html_content):
|
| 2790 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2791 |
# Use a temporary file to store the PDF
|
| 2792 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_pdf:
|
| 2793 |
+
# Convert HTML to PDF using WeasyPrint
|
| 2794 |
+
HTML(string=html_content, base_url='.').write_pdf(tmp_pdf.name)
|
| 2795 |
|
| 2796 |
# Return the path to the PDF file
|
| 2797 |
return gr.update(value=tmp_pdf.name, visible=True)
|
| 2798 |
except Exception as e:
|
| 2799 |
+
logging.error(f"Error generating PDF with WeasyPrint: {str(e)}")
|
| 2800 |
return gr.update(value=None, visible=False)
|
| 2801 |
|
| 2802 |
# When the download button is clicked, generate the PDF
|