Spaces:

rahul7star
/

Pdf-gen

Paused

App Files Files Community

Pdf-gen / app.py

rahul7star

Update app.py

8c1f4ba verified 4 months ago

raw

history blame contribute delete

5.81 kB

	import os
	import requests
	import pandas as pd
	from io import BytesIO, StringIO
	import gradio as gr

	from reportlab.pdfgen import canvas
	from reportlab.lib.pagesizes import A4
	from reportlab.lib.units import inch
	from reportlab.platypus import Paragraph
	from reportlab.lib.styles import ParagraphStyle
	from reportlab.lib.enums import TA_CENTER
	from reportlab.lib.utils import ImageReader


	# =====================================================
	# BUILD HF FILE BASE URL
	# =====================================================

	def build_hf_base_url(csv_url):
	"""
	Converts CSV URL into dataset base resolve path
	"""
	csv_url = csv_url.replace("/blob/", "/resolve/")
	parts = csv_url.split("/resolve/")
	return parts[0] + "/resolve/main/"


	# =====================================================
	# LOAD CSV WITH AUTH
	# =====================================================

	def load_csv_with_auth(csv_url):

	csv_url = csv_url.replace("/blob/", "/resolve/")

	headers = {}
	token = os.getenv("HF_TOKEN")
	if token:
	headers["Authorization"] = f"Bearer {token}"

	r = requests.get(csv_url, headers=headers)

	if r.status_code != 200:
	raise Exception(f"HTTP {r.status_code}")

	return pd.read_csv(StringIO(r.text))


	# =====================================================
	# PDF GENERATOR
	# =====================================================

	def generate_pdf_from_csv(csv_url):

	pdf_path = "storybook_test.pdf"

	# -------------------------------------------------
	# LOAD CSV
	# -------------------------------------------------
	try:
	df = load_csv_with_auth(csv_url)
	except Exception as e:
	return f"❌ Failed loading CSV: {e}", None

	if "image" not in df.columns or "text" not in df.columns:
	return "❌ CSV must contain columns: image, text", None

	# Build HF dataset base path
	base_url = build_hf_base_url(csv_url)

	width, height = A4

	# -------------------------------------------------
	# TEXT STYLE (storybook readable)
	# -------------------------------------------------
	text_style = ParagraphStyle(
	name="StoryText",
	fontName="Helvetica-Bold",
	fontSize=20,
	leading=26,
	alignment=TA_CENTER,
	textColor="white",
	)

	c = canvas.Canvas(pdf_path, pagesize=A4)

	headers = {}
	token = os.getenv("HF_TOKEN")
	if token:
	headers["Authorization"] = f"Bearer {token}"

	# =================================================
	# STORY PAGE LOOP
	# =================================================
	for _, row in df.iterrows():

	# ---------------- IMAGE URL ----------------
	image_url = row["image"]
	if not image_url.startswith("http"):
	image_url = base_url + image_url

	# ---------------- LOAD IMAGE ----------------
	try:
	img_bytes = requests.get(image_url, headers=headers).content
	except Exception:
	continue

	img_reader = ImageReader(BytesIO(img_bytes))
	img_w, img_h = img_reader.getSize()

	# ============================================
	# FIT IMAGE PERFECTLY INTO A4 (NO WHITE SPACE)
	# ============================================
	page_ratio = width / height
	img_ratio = img_w / img_h

	if img_ratio > page_ratio:
	scale = height / img_h
	else:
	scale = width / img_w

	draw_w = img_w * scale
	draw_h = img_h * scale

	x = (width - draw_w) / 2
	y = (height - draw_h) / 2

	c.drawImage(
	img_reader,
	x,
	y,
	width=draw_w,
	height=draw_h,
	mask='auto'
	)

	# ============================================
	# SAFE TEXT AREA (BOOK STYLE)
	# ============================================
	text_area_height = height * 0.30
	text_y = inch * 0.8

	story_text = str(row["text"]).replace("\n", "<br/>")

	paragraph = Paragraph(story_text, text_style)

	text_width = width - 2 * inch

	# Measure text BEFORE drawing
	w, h = paragraph.wrap(text_width, text_area_height)

	# --------------------------------------------
	# If text fits → draw normally
	# --------------------------------------------
	if h <= text_area_height:

	paragraph.drawOn(
	c,
	inch,
	text_y + (text_area_height - h) / 2
	)

	# --------------------------------------------
	# If text too long → auto continuation page
	# --------------------------------------------
	else:
	paragraph.drawOn(c, inch, text_y)
	c.showPage()

	continuation = Paragraph(story_text, text_style)
	continuation.wrap(text_width, height - 2 * inch)
	continuation.drawOn(
	c,
	inch,
	height / 2
	)

	c.showPage()

	c.save()

	return "✅ Professional Storybook PDF Generated!", pdf_path

	# =====================================================
	# GRADIO UI
	# =====================================================

	with gr.Blocks(title="CSV → Storybook PDF Tester") as demo:

	gr.Markdown("## 📕 Storybook PDF Generator (HF Dataset CSV)")

	csv_input = gr.Textbox(
	label="CSV URL",
	value="https://huggingface.co/datasets/rahul7star/zimg-story-book/blob/main/Fo/dataset.csv"
	)

	btn = gr.Button("Generate PDF")

	status = gr.Textbox(label="Status")
	output_pdf = gr.File(label="Download PDF")

	btn.click(
	generate_pdf_from_csv,
	inputs=csv_input,
	outputs=[status, output_pdf]
	)

	demo.launch()