Pdf-gen / app.py
rahul7star's picture
Update app.py
8c1f4ba verified
import os
import requests
import pandas as pd
from io import BytesIO, StringIO
import gradio as gr
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from reportlab.lib.units import inch
from reportlab.platypus import Paragraph
from reportlab.lib.styles import ParagraphStyle
from reportlab.lib.enums import TA_CENTER
from reportlab.lib.utils import ImageReader
# =====================================================
# BUILD HF FILE BASE URL
# =====================================================
def build_hf_base_url(csv_url):
"""
Converts CSV URL into dataset base resolve path
"""
csv_url = csv_url.replace("/blob/", "/resolve/")
parts = csv_url.split("/resolve/")
return parts[0] + "/resolve/main/"
# =====================================================
# LOAD CSV WITH AUTH
# =====================================================
def load_csv_with_auth(csv_url):
csv_url = csv_url.replace("/blob/", "/resolve/")
headers = {}
token = os.getenv("HF_TOKEN")
if token:
headers["Authorization"] = f"Bearer {token}"
r = requests.get(csv_url, headers=headers)
if r.status_code != 200:
raise Exception(f"HTTP {r.status_code}")
return pd.read_csv(StringIO(r.text))
# =====================================================
# PDF GENERATOR
# =====================================================
def generate_pdf_from_csv(csv_url):
pdf_path = "storybook_test.pdf"
# -------------------------------------------------
# LOAD CSV
# -------------------------------------------------
try:
df = load_csv_with_auth(csv_url)
except Exception as e:
return f"❌ Failed loading CSV: {e}", None
if "image" not in df.columns or "text" not in df.columns:
return "❌ CSV must contain columns: image, text", None
# Build HF dataset base path
base_url = build_hf_base_url(csv_url)
width, height = A4
# -------------------------------------------------
# TEXT STYLE (storybook readable)
# -------------------------------------------------
text_style = ParagraphStyle(
name="StoryText",
fontName="Helvetica-Bold",
fontSize=20,
leading=26,
alignment=TA_CENTER,
textColor="white",
)
c = canvas.Canvas(pdf_path, pagesize=A4)
headers = {}
token = os.getenv("HF_TOKEN")
if token:
headers["Authorization"] = f"Bearer {token}"
# =================================================
# STORY PAGE LOOP
# =================================================
for _, row in df.iterrows():
# ---------------- IMAGE URL ----------------
image_url = row["image"]
if not image_url.startswith("http"):
image_url = base_url + image_url
# ---------------- LOAD IMAGE ----------------
try:
img_bytes = requests.get(image_url, headers=headers).content
except Exception:
continue
img_reader = ImageReader(BytesIO(img_bytes))
img_w, img_h = img_reader.getSize()
# ============================================
# FIT IMAGE PERFECTLY INTO A4 (NO WHITE SPACE)
# ============================================
page_ratio = width / height
img_ratio = img_w / img_h
if img_ratio > page_ratio:
scale = height / img_h
else:
scale = width / img_w
draw_w = img_w * scale
draw_h = img_h * scale
x = (width - draw_w) / 2
y = (height - draw_h) / 2
c.drawImage(
img_reader,
x,
y,
width=draw_w,
height=draw_h,
mask='auto'
)
# ============================================
# SAFE TEXT AREA (BOOK STYLE)
# ============================================
text_area_height = height * 0.30
text_y = inch * 0.8
story_text = str(row["text"]).replace("\n", "<br/>")
paragraph = Paragraph(story_text, text_style)
text_width = width - 2 * inch
# Measure text BEFORE drawing
w, h = paragraph.wrap(text_width, text_area_height)
# --------------------------------------------
# If text fits β†’ draw normally
# --------------------------------------------
if h <= text_area_height:
paragraph.drawOn(
c,
inch,
text_y + (text_area_height - h) / 2
)
# --------------------------------------------
# If text too long β†’ auto continuation page
# --------------------------------------------
else:
paragraph.drawOn(c, inch, text_y)
c.showPage()
continuation = Paragraph(story_text, text_style)
continuation.wrap(text_width, height - 2 * inch)
continuation.drawOn(
c,
inch,
height / 2
)
c.showPage()
c.save()
return "βœ… Professional Storybook PDF Generated!", pdf_path
# =====================================================
# GRADIO UI
# =====================================================
with gr.Blocks(title="CSV β†’ Storybook PDF Tester") as demo:
gr.Markdown("## πŸ“• Storybook PDF Generator (HF Dataset CSV)")
csv_input = gr.Textbox(
label="CSV URL",
value="https://huggingface.co/datasets/rahul7star/zimg-story-book/blob/main/Fo/dataset.csv"
)
btn = gr.Button("Generate PDF")
status = gr.Textbox(label="Status")
output_pdf = gr.File(label="Download PDF")
btn.click(
generate_pdf_from_csv,
inputs=csv_input,
outputs=[status, output_pdf]
)
demo.launch()