raahinaez's picture
Update app.py
96bd1c9 verified
import os
import gradio as gr
import markdown
from weasyprint import HTML
# -------------------- PDF Generator --------------------
def text_to_searchable_pdf(text, output_path):
"""
Converts Markdown text (English + Hindi + Punjabi) into a searchable PDF
using WeasyPrint, with each page's content on a separate PDF page.
"""
# Find the first occurrence of "=== PAGE " to start processing
start_index = text.find("=== PAGE ")
if start_index != -1:
text = text[start_index:] # Skip everything before the first page marker
# Split the text into pages based on the delimiter
pages = text.split("=== PAGE ") # Splitting by the exact delimiter
# Convert each page to HTML and add a page break
html_pages = []
for page_content in pages:
page_content = page_content.strip()
if page_content and "===" in page_content: # Ensure it contains valid page content
# Remove the "x ===" part
page_number, content = page_content.split("===", 1)
content = content.strip()
html_content = markdown.markdown(content)
html_pages.append(f"""
<div style="page-break-after: always;">
<h2>Page {page_number.strip()}</h2>
{html_content}
</div>
""")
# Wrap in HTML with embedded fonts
base_dir = os.path.dirname(__file__)
font_regular = os.path.join(base_dir, "NotoSans-Regular.ttf")
font_gurmukhi = os.path.join(base_dir, "NotoSansGurmukhi-Regular.ttf")
full_html = f"""
<html>
<head>
<meta charset="UTF-8">
<style>
@font-face {{
font-family: 'NotoSans';
src: url('file://{font_regular}');
}}
@font-face {{
font-family: 'NotoSansGurmukhi';
src: url('file://{font_gurmukhi}');
}}
body {{
font-family: 'NotoSans', 'NotoSansGurmukhi', sans-serif;
font-size: 12pt;
line-height: 1.5;
}}
h1 {{ font-size: 18pt; text-align: center; margin-top: 20px; }}
h2 {{ font-size: 16pt; margin-top: 15px; }}
p {{ margin: 5px 0; }}
li {{ margin: 3px 0; }}
div {{ page-break-after: always; }}
</style>
</head>
<body>
{''.join(html_pages)}
</body>
</html>
"""
# Generate PDF
HTML(string=full_html).write_pdf(output_path)
return output_path
# -------------------- Gradio Wrapper --------------------
def generate_pdf(text):
output_path = "output_multi_lang.pdf"
return text_to_searchable_pdf(text, output_path)
# -------------------- Gradio UI --------------------
with gr.Blocks() as demo:
gr.HTML("<div id='title'>🌍 Multi-Language PDF Generator</div>")
gr.HTML("<div id='sub'>Create beautiful Markdown-formatted searchable PDFs</div>")
with gr.Group():
text_input = gr.Textbox(
lines=14,
label="Enter Markdown Text",
placeholder="Use **bold**, # headings, lists, Unicode (Hindi/Punjabi)…",
)
generate_btn = gr.Button("🚀 Generate PDF", variant="primary")
pdf_output = gr.File(label="Download your PDF ➡️")
generate_btn.click(
fn=generate_pdf,
inputs=text_input,
outputs=pdf_output,
)
# -------------------- Launch --------------------
demo.launch(
theme=gr.themes.Soft(
primary_hue="blue",
secondary_hue="purple",
radius_size=gr.themes.sizes.radius_md
),
css="""
#title {
text-align: center;
font-size: 36px;
font-weight: 800;
padding: 20px;
background: linear-gradient(90deg, #4e8df5, #9d5cf7);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
}
#sub {
text-align: center;
font-size: 17px;
margin-top: -10px;
color: #6a6a6a;
}
.gradio-container {
max-width: 820px !important;
margin: auto;
}
"""
)