Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,278 +8,291 @@ import fitz # PyMuPDF
|
|
| 8 |
from PIL import Image
|
| 9 |
import io
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate, Image as ReportLabImage
|
| 12 |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
| 13 |
-
from reportlab.lib.pagesizes import letter
|
| 14 |
from reportlab.lib.units import inch
|
| 15 |
from reportlab.lib import colors
|
| 16 |
from reportlab.pdfbase import pdfmetrics
|
| 17 |
from reportlab.pdfbase.ttfonts import TTFont
|
| 18 |
|
| 19 |
-
# --- Configuration ---
|
| 20 |
CWD = Path.cwd()
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
-
#
|
| 28 |
-
|
| 29 |
-
|
| 30 |
|
| 31 |
# --- Font & Emoji Handling ---
|
| 32 |
EMOJI_FONT_PATH = None
|
| 33 |
EMOJI_IMAGE_CACHE = {}
|
| 34 |
|
| 35 |
def setup_fonts():
|
| 36 |
-
"""Finds
|
| 37 |
global EMOJI_FONT_PATH
|
| 38 |
-
|
| 39 |
|
| 40 |
-
# Locate the essential NotoColorEmoji font
|
| 41 |
noto_emoji_path = FONT_DIR / "NotoColorEmoji-Regular.ttf"
|
| 42 |
if noto_emoji_path.exists():
|
| 43 |
EMOJI_FONT_PATH = str(noto_emoji_path)
|
| 44 |
print(f"Found emoji font: {EMOJI_FONT_PATH}")
|
| 45 |
else:
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
def render_emoji_as_image(emoji_char, size_pt):
|
| 59 |
-
"""
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
"""
|
| 63 |
-
if not EMOJI_FONT_PATH:
|
| 64 |
-
print("Cannot render emoji: Emoji font path not set.")
|
| 65 |
-
return None
|
| 66 |
|
| 67 |
-
# Use a cache to avoid re-rendering the same emoji multiple times
|
| 68 |
-
if (emoji_char, size_pt) in EMOJI_IMAGE_CACHE:
|
| 69 |
-
return EMOJI_IMAGE_CACHE[(emoji_char, size_pt)]
|
| 70 |
-
|
| 71 |
try:
|
| 72 |
-
# Use PyMuPDF (fitz) to draw the emoji onto a temporary, transparent canvas
|
| 73 |
rect = fitz.Rect(0, 0, size_pt * 1.5, size_pt * 1.5)
|
| 74 |
doc = fitz.open()
|
| 75 |
page = doc.new_page(width=rect.width, height=rect.height)
|
| 76 |
-
|
| 77 |
-
# Load the color emoji font
|
| 78 |
page.insert_font(fontname="emoji", fontfile=EMOJI_FONT_PATH)
|
| 79 |
-
|
| 80 |
-
# Insert the emoji character. The vertical alignment may need tweaking.
|
| 81 |
page.insert_text(fitz.Point(0, size_pt * 1.1), emoji_char, fontname="emoji", fontsize=size_pt)
|
| 82 |
-
|
| 83 |
-
# Get a high-resolution PNG of the emoji with a transparent background
|
| 84 |
pix = page.get_pixmap(alpha=True, dpi=300)
|
| 85 |
doc.close()
|
| 86 |
-
|
| 87 |
-
# Save the PNG to an in-memory buffer
|
| 88 |
img_buffer = io.BytesIO(pix.tobytes("png"))
|
| 89 |
img_buffer.seek(0)
|
| 90 |
-
|
| 91 |
-
# Add the buffer to the cache and return it
|
| 92 |
EMOJI_IMAGE_CACHE[(emoji_char, size_pt)] = img_buffer
|
| 93 |
return img_buffer
|
| 94 |
except Exception as e:
|
| 95 |
print(f"ERROR: Could not render emoji '{emoji_char}': {e}")
|
| 96 |
return None
|
| 97 |
|
| 98 |
-
# ---
|
| 99 |
-
def
|
| 100 |
-
"""
|
| 101 |
-
The main function to convert a single Markdown file into a PDF.
|
| 102 |
-
It reads the text, processes it line by line, and replaces emojis with images.
|
| 103 |
-
"""
|
| 104 |
-
print(f"--- Starting PDF conversion for: {md_filepath.name} ---")
|
| 105 |
-
|
| 106 |
-
# Define styles for the PDF document
|
| 107 |
styles = getSampleStyleSheet()
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
style_h2 = ParagraphStyle('H2', fontName=body_font, fontSize=18, leading=22, spaceAfter=10)
|
| 113 |
-
|
| 114 |
-
# Regex to find all emojis in a string
|
| 115 |
emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}"
|
| 116 |
f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}"
|
| 117 |
f"{re.escape(''.join(map(chr, range(0x1f900, 0x1f9ff))))}"
|
| 118 |
f"{re.escape(''.join(map(chr, range(0x2600, 0x26ff))))}]+)")
|
| 119 |
|
| 120 |
def text_to_flowables(text, style):
|
| 121 |
-
"""
|
| 122 |
-
Splits a line of text into a list of Paragraphs and Images.
|
| 123 |
-
This allows text and emoji-images to flow together on the same line.
|
| 124 |
-
"""
|
| 125 |
parts = emoji_pattern.split(text)
|
| 126 |
flowables = []
|
| 127 |
for part in parts:
|
| 128 |
if not part: continue
|
| 129 |
-
|
| 130 |
if emoji_pattern.match(part):
|
| 131 |
-
# This part is an emoji or a sequence of them
|
| 132 |
for emoji_char in part:
|
| 133 |
img_buffer = render_emoji_as_image(emoji_char, style.fontSize)
|
| 134 |
if img_buffer:
|
| 135 |
-
# Create an Image object, slightly larger than the text for better spacing
|
| 136 |
img = ReportLabImage(img_buffer, height=style.fontSize * 1.2, width=style.fontSize * 1.2)
|
| 137 |
flowables.append(img)
|
| 138 |
else:
|
| 139 |
-
|
| 140 |
-
flowables.append(Paragraph(
|
| 141 |
-
|
| 142 |
-
# Use a Table to keep all parts on the same line. This is a common ReportLab technique.
|
| 143 |
-
if flowables:
|
| 144 |
-
return Table([flowables], colWidths=[None] * len(flowables), style=[('VALIGN', (0,0), (-1,-1), 'MIDDLE')])
|
| 145 |
-
return None
|
| 146 |
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
# Simple Markdown parsing
|
| 161 |
-
if stripped_line.startswith('# '):
|
| 162 |
-
flowable = text_to_flowables(stripped_line[2:], style_h1)
|
| 163 |
-
elif stripped_line.startswith('## '):
|
| 164 |
-
flowable = text_to_flowables(stripped_line[2:], style_h2)
|
| 165 |
-
elif stripped_line:
|
| 166 |
-
flowable = text_to_flowables(stripped_line, style_body)
|
| 167 |
else:
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
|
|
|
|
|
|
| 177 |
|
| 178 |
-
|
| 179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
try:
|
| 181 |
-
doc.
|
| 182 |
-
|
| 183 |
-
|
|
|
|
|
|
|
|
|
|
| 184 |
except Exception as e:
|
| 185 |
-
print(f"
|
| 186 |
return None
|
| 187 |
|
| 188 |
-
# ---
|
| 189 |
-
def
|
| 190 |
-
""
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
# Clear caches and temp directories for a clean run
|
| 197 |
EMOJI_IMAGE_CACHE.clear()
|
| 198 |
-
shutil.rmtree(TEMP_DIR, ignore_errors=True); TEMP_DIR.mkdir(exist_ok=True)
|
| 199 |
-
|
| 200 |
-
log_messages = []
|
| 201 |
-
generated_pdf_paths = []
|
| 202 |
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
print("\n" + "="*60)
|
| 233 |
-
print(e)
|
| 234 |
-
print("The application cannot start without this font file.")
|
| 235 |
-
print("Please add 'NotoColorEmoji-Regular.ttf' and 'DejaVuSans.ttf' to your project directory.")
|
| 236 |
-
print("="*60)
|
| 237 |
-
exit() # Stop the script
|
| 238 |
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
log_output = gr.Textbox(label="Alchemist's Log", lines=8, interactive=False)
|
| 253 |
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
pdf_gallery = gr.Gallery(
|
| 258 |
-
label="Generated PDFs",
|
| 259 |
-
show_label=False,
|
| 260 |
-
elem_id="gallery",
|
| 261 |
-
columns=3,
|
| 262 |
-
object_fit="contain",
|
| 263 |
-
height="auto"
|
| 264 |
-
)
|
| 265 |
-
# This button allows manual refreshing of the gallery
|
| 266 |
-
refresh_button = gr.Button("🔄 Refresh Library")
|
| 267 |
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
)
|
| 280 |
-
|
| 281 |
-
# Load the gallery with existing PDFs when the app starts
|
| 282 |
-
demo.load(refresh_gallery, None, pdf_gallery)
|
| 283 |
|
| 284 |
-
|
| 285 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
from PIL import Image
|
| 9 |
import io
|
| 10 |
|
| 11 |
+
# Imports for new formats
|
| 12 |
+
from docx import Document
|
| 13 |
+
import openpyxl
|
| 14 |
+
|
| 15 |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate, Image as ReportLabImage
|
| 16 |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
| 17 |
+
from reportlab.lib.pagesizes import letter, A4, legal, landscape
|
| 18 |
from reportlab.lib.units import inch
|
| 19 |
from reportlab.lib import colors
|
| 20 |
from reportlab.pdfbase import pdfmetrics
|
| 21 |
from reportlab.pdfbase.ttfonts import TTFont
|
| 22 |
|
| 23 |
+
# --- Configuration & Setup ---
|
| 24 |
CWD = Path.cwd()
|
| 25 |
+
LAYOUTS = {
|
| 26 |
+
"A4 Portrait": {"size": A4},
|
| 27 |
+
"A4 Landscape": {"size": landscape(A4)},
|
| 28 |
+
"Letter Portrait": {"size": letter},
|
| 29 |
+
"Letter Landscape": {"size": landscape(letter)},
|
| 30 |
+
}
|
| 31 |
+
OUTPUT_DIR = CWD / "generated_outputs"
|
| 32 |
+
PREVIEW_DIR = CWD / "previews"
|
| 33 |
+
FONT_DIR = CWD
|
| 34 |
|
| 35 |
+
# Create necessary directories
|
| 36 |
+
OUTPUT_DIR.mkdir(exist_ok=True)
|
| 37 |
+
PREVIEW_DIR.mkdir(exist_ok=True)
|
| 38 |
|
| 39 |
# --- Font & Emoji Handling ---
|
| 40 |
EMOJI_FONT_PATH = None
|
| 41 |
EMOJI_IMAGE_CACHE = {}
|
| 42 |
|
| 43 |
def setup_fonts():
|
| 44 |
+
"""Finds and registers all .ttf files from the app directory."""
|
| 45 |
global EMOJI_FONT_PATH
|
| 46 |
+
text_font_names = []
|
| 47 |
|
|
|
|
| 48 |
noto_emoji_path = FONT_DIR / "NotoColorEmoji-Regular.ttf"
|
| 49 |
if noto_emoji_path.exists():
|
| 50 |
EMOJI_FONT_PATH = str(noto_emoji_path)
|
| 51 |
print(f"Found emoji font: {EMOJI_FONT_PATH}")
|
| 52 |
else:
|
| 53 |
+
raise FileNotFoundError("CRITICAL: 'NotoColorEmoji-Regular.ttf' not found. This file is required for emoji support.")
|
| 54 |
+
|
| 55 |
+
for font_path in FONT_DIR.glob("*.ttf"):
|
| 56 |
+
try:
|
| 57 |
+
font_name = font_path.stem
|
| 58 |
+
pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
|
| 59 |
+
pdfmetrics.registerFontFamily(font_name, normal=font_name, bold=font_name, italic=font_name, boldItalic=font_name)
|
| 60 |
+
if "notocoloremoji" not in font_name.lower():
|
| 61 |
+
text_font_names.append(font_name)
|
| 62 |
+
except Exception as e:
|
| 63 |
+
print(f"Could not register font {font_path.name}: {e}")
|
| 64 |
+
|
| 65 |
+
if not text_font_names:
|
| 66 |
+
text_font_names.append('Helvetica')
|
| 67 |
+
return sorted(text_font_names)
|
| 68 |
|
| 69 |
def render_emoji_as_image(emoji_char, size_pt):
|
| 70 |
+
"""Renders an emoji character as a PNG image in memory."""
|
| 71 |
+
if not EMOJI_FONT_PATH: return None
|
| 72 |
+
if (emoji_char, size_pt) in EMOJI_IMAGE_CACHE: return EMOJI_IMAGE_CACHE[(emoji_char, size_pt)]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
try:
|
|
|
|
| 75 |
rect = fitz.Rect(0, 0, size_pt * 1.5, size_pt * 1.5)
|
| 76 |
doc = fitz.open()
|
| 77 |
page = doc.new_page(width=rect.width, height=rect.height)
|
|
|
|
|
|
|
| 78 |
page.insert_font(fontname="emoji", fontfile=EMOJI_FONT_PATH)
|
|
|
|
|
|
|
| 79 |
page.insert_text(fitz.Point(0, size_pt * 1.1), emoji_char, fontname="emoji", fontsize=size_pt)
|
|
|
|
|
|
|
| 80 |
pix = page.get_pixmap(alpha=True, dpi=300)
|
| 81 |
doc.close()
|
|
|
|
|
|
|
| 82 |
img_buffer = io.BytesIO(pix.tobytes("png"))
|
| 83 |
img_buffer.seek(0)
|
|
|
|
|
|
|
| 84 |
EMOJI_IMAGE_CACHE[(emoji_char, size_pt)] = img_buffer
|
| 85 |
return img_buffer
|
| 86 |
except Exception as e:
|
| 87 |
print(f"ERROR: Could not render emoji '{emoji_char}': {e}")
|
| 88 |
return None
|
| 89 |
|
| 90 |
+
# --- Document Generation Engines ---
|
| 91 |
+
def markdown_to_story(markdown_text: str, font_name: str):
|
| 92 |
+
"""Converts markdown to a ReportLab story, converting emojis to images."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
styles = getSampleStyleSheet()
|
| 94 |
+
style_body = ParagraphStyle('BodyText', fontName=font_name, spaceAfter=6, fontSize=11, leading=14)
|
| 95 |
+
style_h1 = ParagraphStyle('h1', fontName=font_name, fontSize=24, leading=28, spaceAfter=12, textColor=colors.darkblue)
|
| 96 |
+
style_h2 = ParagraphStyle('h2', fontName=font_name, fontSize=18, leading=22, spaceAfter=10, textColor=colors.darkslateblue)
|
| 97 |
+
|
|
|
|
|
|
|
|
|
|
| 98 |
emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}"
|
| 99 |
f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}"
|
| 100 |
f"{re.escape(''.join(map(chr, range(0x1f900, 0x1f9ff))))}"
|
| 101 |
f"{re.escape(''.join(map(chr, range(0x2600, 0x26ff))))}]+)")
|
| 102 |
|
| 103 |
def text_to_flowables(text, style):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
parts = emoji_pattern.split(text)
|
| 105 |
flowables = []
|
| 106 |
for part in parts:
|
| 107 |
if not part: continue
|
|
|
|
| 108 |
if emoji_pattern.match(part):
|
|
|
|
| 109 |
for emoji_char in part:
|
| 110 |
img_buffer = render_emoji_as_image(emoji_char, style.fontSize)
|
| 111 |
if img_buffer:
|
|
|
|
| 112 |
img = ReportLabImage(img_buffer, height=style.fontSize * 1.2, width=style.fontSize * 1.2)
|
| 113 |
flowables.append(img)
|
| 114 |
else:
|
| 115 |
+
formatted_part = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', part)
|
| 116 |
+
flowables.append(Paragraph(formatted_part, style))
|
| 117 |
+
return Table([flowables], colWidths=[None] * len(flowables), style=[('VALIGN', (0, 0), (-1, -1), 'MIDDLE')]) if flowables else Spacer(0,0)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
+
story, first_heading = [], True
|
| 120 |
+
for line in markdown_text.split('\n'):
|
| 121 |
+
stripped = line.strip()
|
| 122 |
+
if stripped.startswith("# "):
|
| 123 |
+
if not first_heading: story.append(PageBreak())
|
| 124 |
+
story.append(text_to_flowables(stripped[2:], style_h1)); first_heading = False
|
| 125 |
+
elif stripped.startswith("## "):
|
| 126 |
+
story.append(text_to_flowables(stripped[3:], style_h2))
|
| 127 |
+
elif stripped.startswith(("- ", "* ")):
|
| 128 |
+
story.append(text_to_flowables(stripped[2:], ParagraphStyle(parent=style_body, leftIndent=20, bulletIndent=10)))
|
| 129 |
+
elif stripped:
|
| 130 |
+
story.append(text_to_flowables(stripped, style_body))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
else:
|
| 132 |
+
story.append(Spacer(1, 0.2*inch))
|
| 133 |
+
return story
|
| 134 |
+
|
| 135 |
+
def create_pdf(md_content, font_name, pagesize, num_columns):
|
| 136 |
+
"""Generates a PDF file from markdown content."""
|
| 137 |
+
md_buffer = io.BytesIO()
|
| 138 |
+
story = markdown_to_story(md_content, font_name)
|
| 139 |
+
if num_columns > 1:
|
| 140 |
+
doc = BaseDocTemplate(md_buffer, pagesize=pagesize, leftMargin=0.5*inch, rightMargin=0.5*inch, topMargin=0.5*inch, bottomMargin=0.5*inch)
|
| 141 |
+
frame_width = (doc.width / num_columns) - (num_columns - 1) * 0.1*inch
|
| 142 |
+
frames = [Frame(doc.leftMargin + i * (frame_width + 0.2*inch), doc.bottomMargin, frame_width, doc.height) for i in range(num_columns)]
|
| 143 |
+
doc.addPageTemplates([PageTemplate(id='MultiCol', frames=frames)])
|
| 144 |
+
else:
|
| 145 |
+
doc = SimpleDocTemplate(md_buffer, pagesize=pagesize)
|
| 146 |
+
doc.build(story)
|
| 147 |
+
return md_buffer
|
| 148 |
+
|
| 149 |
+
def create_docx(md_content):
|
| 150 |
+
"""Generates a DOCX file from markdown content."""
|
| 151 |
+
document = Document()
|
| 152 |
+
for line in md_content.split('\n'):
|
| 153 |
+
line = line.strip()
|
| 154 |
+
if line.startswith('# '): document.add_heading(line[2:], level=1)
|
| 155 |
+
elif line.startswith('## '): document.add_heading(line[3:], level=2)
|
| 156 |
+
elif line.startswith(('- ','* ')): document.add_paragraph(line[2:], style='List Bullet')
|
| 157 |
+
elif line:
|
| 158 |
+
p = document.add_paragraph()
|
| 159 |
+
parts = re.split(r'(\*\*.*?\*\*)', line)
|
| 160 |
+
for part in parts:
|
| 161 |
+
if part.startswith('**') and part.endswith('**'):
|
| 162 |
+
p.add_run(part[2:-2]).bold = True
|
| 163 |
+
else:
|
| 164 |
+
p.add_run(part)
|
| 165 |
+
return document
|
| 166 |
|
| 167 |
+
def create_xlsx(md_content):
|
| 168 |
+
"""Generates an XLSX file, splitting content by H1 headers into columns."""
|
| 169 |
+
workbook = openpyxl.Workbook()
|
| 170 |
+
sheet = workbook.active
|
| 171 |
+
sections = re.split(r'\n# ', '\n' + md_content)
|
| 172 |
+
if not sections[0].strip(): sections.pop(0)
|
| 173 |
|
| 174 |
+
for c_idx, section in enumerate(sections, 1):
|
| 175 |
+
lines = section.split('\n')
|
| 176 |
+
sheet.cell(row=1, column=c_idx, value=lines[0].strip()) # Header
|
| 177 |
+
for r_idx, line_content in enumerate(lines[1:], 2):
|
| 178 |
+
sheet.cell(row=r_idx, column=c_idx, value=line_content.strip())
|
| 179 |
+
return workbook
|
| 180 |
+
|
| 181 |
+
def create_pdf_preview(pdf_path: Path):
|
| 182 |
+
preview_path = PREVIEW_DIR / f"{pdf_path.stem}.png"
|
| 183 |
try:
|
| 184 |
+
doc = fitz.open(pdf_path)
|
| 185 |
+
page = doc.load_page(0)
|
| 186 |
+
pix = page.get_pixmap(dpi=150)
|
| 187 |
+
pix.save(str(preview_path))
|
| 188 |
+
doc.close()
|
| 189 |
+
return str(preview_path)
|
| 190 |
except Exception as e:
|
| 191 |
+
print(f"Preview failed for {pdf_path.name}: {e}")
|
| 192 |
return None
|
| 193 |
|
| 194 |
+
# --- Main API Function ---
|
| 195 |
+
def generate_outputs_api(files, output_formats, layouts, fonts, num_columns, progress=gr.Progress(track_tqdm=True)):
|
| 196 |
+
if not files: raise gr.Error("Please upload at least one Markdown (.md) file.")
|
| 197 |
+
if not output_formats: raise gr.Error("Please select at least one output format.")
|
| 198 |
+
|
| 199 |
+
shutil.rmtree(OUTPUT_DIR, ignore_errors=True); shutil.rmtree(PREVIEW_DIR, ignore_errors=True)
|
| 200 |
+
OUTPUT_DIR.mkdir(); PREVIEW_DIR.mkdir()
|
|
|
|
|
|
|
| 201 |
EMOJI_IMAGE_CACHE.clear()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
|
| 203 |
+
# Consolidate all markdown content, adding H1 headers between files
|
| 204 |
+
md_contents = []
|
| 205 |
+
for f in files:
|
| 206 |
+
try:
|
| 207 |
+
md_contents.append(Path(f.name).read_text(encoding='utf-8'))
|
| 208 |
+
except Exception as e:
|
| 209 |
+
print(f"Error reading {f.name}: {e}")
|
| 210 |
+
md_content = "\n\n# ".join(md_contents)
|
| 211 |
+
if not md_content.startswith("#"): md_content = "# " + md_content
|
| 212 |
+
|
| 213 |
+
generated_files = []
|
| 214 |
+
for format_choice in progress.tqdm(output_formats, desc="Generating Formats"):
|
| 215 |
+
time_str = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
|
| 216 |
|
| 217 |
+
try:
|
| 218 |
+
if format_choice == "PDF":
|
| 219 |
+
for layout_name in layouts:
|
| 220 |
+
for font_name in fonts:
|
| 221 |
+
pagesize = LAYOUTS.get(layout_name, {}).get("size", letter)
|
| 222 |
+
pdf_buffer = create_pdf(md_content, font_name, pagesize, num_columns)
|
| 223 |
+
filename = f"Document_{layout_name.replace(' ','-')}_{font_name}_{time_str}.pdf"
|
| 224 |
+
output_path = OUTPUT_DIR / filename
|
| 225 |
+
with open(output_path, "wb") as f: f.write(pdf_buffer.getvalue())
|
| 226 |
+
generated_files.append(output_path)
|
| 227 |
|
| 228 |
+
elif format_choice == "DOCX":
|
| 229 |
+
docx_doc = create_docx(md_content)
|
| 230 |
+
filename = f"Document_{time_str}.docx"
|
| 231 |
+
output_path = OUTPUT_DIR / filename
|
| 232 |
+
docx_doc.save(output_path)
|
| 233 |
+
generated_files.append(output_path)
|
| 234 |
+
|
| 235 |
+
elif format_choice == "XLSX":
|
| 236 |
+
xlsx_book = create_xlsx(md_content)
|
| 237 |
+
filename = f"Outline_{time_str}.xlsx"
|
| 238 |
+
output_path = OUTPUT_DIR / filename
|
| 239 |
+
xlsx_book.save(output_path)
|
| 240 |
+
generated_files.append(output_path)
|
| 241 |
+
except Exception as e:
|
| 242 |
+
print(f"Failed to generate {format_choice}: {e}")
|
| 243 |
+
gr.Warning(f"Failed to generate {format_choice}. See console for details.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
|
| 245 |
+
gallery_previews = [p for p in [create_pdf_preview(f) for f in generated_files if f.suffix == '.pdf'] if p]
|
| 246 |
+
log_message = f"Generated {len(generated_files)} files." if generated_files else "Generation failed. Check logs."
|
| 247 |
+
|
| 248 |
+
return gallery_previews, log_message, [str(p) for p in generated_files]
|
| 249 |
|
| 250 |
+
# --- Gradio UI Definition ---
|
| 251 |
+
try:
|
| 252 |
+
AVAILABLE_FONTS = setup_fonts()
|
| 253 |
+
except FileNotFoundError as e:
|
| 254 |
+
print(e)
|
| 255 |
+
# If font setup fails, we can't run the app.
|
| 256 |
+
# This prevents Gradio from starting with a fatal error.
|
| 257 |
+
AVAILABLE_FONTS = []
|
|
|
|
| 258 |
|
| 259 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="Multi-Format Document Generator") as demo:
|
| 260 |
+
gr.Markdown("# 📄 Multi-Format Document Generator (PDF, DOCX, XLSX)")
|
| 261 |
+
gr.Markdown("Upload one or more Markdown files (`.md`). The tool will combine them and generate documents in your chosen formats. Emojis in PDFs are fully supported! 🥳")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
|
| 263 |
+
with gr.Row():
|
| 264 |
+
with gr.Column(scale=1):
|
| 265 |
+
gr.Markdown("### ⚙️ Generation Settings")
|
| 266 |
+
uploaded_files = gr.File(label="Upload Markdown Files", file_count="multiple", file_types=[".md"])
|
| 267 |
+
output_formats = gr.CheckboxGroup(choices=["PDF", "DOCX", "XLSX"], label="Select Output Formats", value=["PDF"])
|
| 268 |
+
|
| 269 |
+
with gr.Accordion("PDF Customization", open=True):
|
| 270 |
+
selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Base Page Layout", value=["Letter Portrait"])
|
| 271 |
+
selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Text Font", value=[AVAILABLE_FONTS[0]] if AVAILABLE_FONTS else [])
|
| 272 |
+
num_columns_slider = gr.Slider(label="Text Columns (for PDF)", minimum=1, maximum=4, step=1, value=1)
|
| 273 |
+
|
| 274 |
+
generate_btn = gr.Button("🚀 Generate Documents", variant="primary")
|
| 275 |
|
| 276 |
+
with gr.Column(scale=2):
|
| 277 |
+
gr.Markdown("### 🖼️ Output Files")
|
| 278 |
+
gallery_output = gr.Gallery(label="PDF Previews", show_label=False, elem_id="gallery", columns=3, height="auto", object_fit="contain")
|
| 279 |
+
log_output = gr.Markdown(label="Generation Log", value="Ready...")
|
| 280 |
+
downloadable_files_output = gr.Files(label="Download Generated Files")
|
| 281 |
+
|
| 282 |
+
if not AVAILABLE_FONTS:
|
| 283 |
+
gr.Warning("The application is in a degraded state. Required font files are missing. Please check the console log and add the necessary files.")
|
| 284 |
+
else:
|
| 285 |
+
generate_btn.click(
|
| 286 |
+
fn=generate_outputs_api,
|
| 287 |
+
inputs=[uploaded_files, output_formats, selected_layouts, selected_fonts, num_columns_slider],
|
| 288 |
+
outputs=[gallery_output, log_output, downloadable_files_output]
|
| 289 |
)
|
|
|
|
|
|
|
|
|
|
| 290 |
|
| 291 |
+
if __name__ == "__main__":
|
| 292 |
+
if AVAILABLE_FONTS:
|
| 293 |
+
demo.launch()
|
| 294 |
+
else:
|
| 295 |
+
print("\n" + "="*60)
|
| 296 |
+
print("Application launch aborted due to missing font files.")
|
| 297 |
+
print("Please ensure 'NotoColorEmoji-Regular.ttf' is in the project directory.")
|
| 298 |
+
print("="*60)
|