import os import requests import pandas as pd from io import BytesIO, StringIO import gradio as gr from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import A4 from reportlab.lib.units import inch from reportlab.platypus import Paragraph from reportlab.lib.styles import ParagraphStyle from reportlab.lib.enums import TA_CENTER from reportlab.lib.utils import ImageReader # ===================================================== # BUILD HF FILE BASE URL # ===================================================== def build_hf_base_url(csv_url): """ Converts CSV URL into dataset base resolve path """ csv_url = csv_url.replace("/blob/", "/resolve/") parts = csv_url.split("/resolve/") return parts[0] + "/resolve/main/" # ===================================================== # LOAD CSV WITH AUTH # ===================================================== def load_csv_with_auth(csv_url): csv_url = csv_url.replace("/blob/", "/resolve/") headers = {} token = os.getenv("HF_TOKEN") if token: headers["Authorization"] = f"Bearer {token}" r = requests.get(csv_url, headers=headers) if r.status_code != 200: raise Exception(f"HTTP {r.status_code}") return pd.read_csv(StringIO(r.text)) # ===================================================== # PDF GENERATOR # ===================================================== def generate_pdf_from_csv(csv_url): pdf_path = "storybook_test.pdf" # ------------------------------------------------- # LOAD CSV # ------------------------------------------------- try: df = load_csv_with_auth(csv_url) except Exception as e: return f"❌ Failed loading CSV: {e}", None if "image" not in df.columns or "text" not in df.columns: return "❌ CSV must contain columns: image, text", None # Build HF dataset base path base_url = build_hf_base_url(csv_url) width, height = A4 # ------------------------------------------------- # TEXT STYLE (storybook readable) # ------------------------------------------------- text_style = ParagraphStyle( name="StoryText", fontName="Helvetica-Bold", fontSize=20, leading=26, alignment=TA_CENTER, textColor="white", ) c = canvas.Canvas(pdf_path, pagesize=A4) headers = {} token = os.getenv("HF_TOKEN") if token: headers["Authorization"] = f"Bearer {token}" # ================================================= # STORY PAGE LOOP # ================================================= for _, row in df.iterrows(): # ---------------- IMAGE URL ---------------- image_url = row["image"] if not image_url.startswith("http"): image_url = base_url + image_url # ---------------- LOAD IMAGE ---------------- try: img_bytes = requests.get(image_url, headers=headers).content except Exception: continue img_reader = ImageReader(BytesIO(img_bytes)) img_w, img_h = img_reader.getSize() # ============================================ # FIT IMAGE PERFECTLY INTO A4 (NO WHITE SPACE) # ============================================ page_ratio = width / height img_ratio = img_w / img_h if img_ratio > page_ratio: scale = height / img_h else: scale = width / img_w draw_w = img_w * scale draw_h = img_h * scale x = (width - draw_w) / 2 y = (height - draw_h) / 2 c.drawImage( img_reader, x, y, width=draw_w, height=draw_h, mask='auto' ) # ============================================ # SAFE TEXT AREA (BOOK STYLE) # ============================================ text_area_height = height * 0.30 text_y = inch * 0.8 story_text = str(row["text"]).replace("\n", "
") paragraph = Paragraph(story_text, text_style) text_width = width - 2 * inch # Measure text BEFORE drawing w, h = paragraph.wrap(text_width, text_area_height) # -------------------------------------------- # If text fits → draw normally # -------------------------------------------- if h <= text_area_height: paragraph.drawOn( c, inch, text_y + (text_area_height - h) / 2 ) # -------------------------------------------- # If text too long → auto continuation page # -------------------------------------------- else: paragraph.drawOn(c, inch, text_y) c.showPage() continuation = Paragraph(story_text, text_style) continuation.wrap(text_width, height - 2 * inch) continuation.drawOn( c, inch, height / 2 ) c.showPage() c.save() return "✅ Professional Storybook PDF Generated!", pdf_path # ===================================================== # GRADIO UI # ===================================================== with gr.Blocks(title="CSV → Storybook PDF Tester") as demo: gr.Markdown("## 📕 Storybook PDF Generator (HF Dataset CSV)") csv_input = gr.Textbox( label="CSV URL", value="https://huggingface.co/datasets/rahul7star/zimg-story-book/blob/main/Fo/dataset.csv" ) btn = gr.Button("Generate PDF") status = gr.Textbox(label="Status") output_pdf = gr.File(label="Download PDF") btn.click( generate_pdf_from_csv, inputs=csv_input, outputs=[status, output_pdf] ) demo.launch()