Spaces:
Paused
Paused
| import os | |
| import requests | |
| import pandas as pd | |
| from io import BytesIO, StringIO | |
| import gradio as gr | |
| from reportlab.pdfgen import canvas | |
| from reportlab.lib.pagesizes import A4 | |
| from reportlab.lib.units import inch | |
| from reportlab.platypus import Paragraph | |
| from reportlab.lib.styles import ParagraphStyle | |
| from reportlab.lib.enums import TA_CENTER | |
| from reportlab.lib.utils import ImageReader | |
| # ===================================================== | |
| # BUILD HF FILE BASE URL | |
| # ===================================================== | |
| def build_hf_base_url(csv_url): | |
| """ | |
| Converts CSV URL into dataset base resolve path | |
| """ | |
| csv_url = csv_url.replace("/blob/", "/resolve/") | |
| parts = csv_url.split("/resolve/") | |
| return parts[0] + "/resolve/main/" | |
| # ===================================================== | |
| # LOAD CSV WITH AUTH | |
| # ===================================================== | |
| def load_csv_with_auth(csv_url): | |
| csv_url = csv_url.replace("/blob/", "/resolve/") | |
| headers = {} | |
| token = os.getenv("HF_TOKEN") | |
| if token: | |
| headers["Authorization"] = f"Bearer {token}" | |
| r = requests.get(csv_url, headers=headers) | |
| if r.status_code != 200: | |
| raise Exception(f"HTTP {r.status_code}") | |
| return pd.read_csv(StringIO(r.text)) | |
| # ===================================================== | |
| # PDF GENERATOR | |
| # ===================================================== | |
| def generate_pdf_from_csv(csv_url): | |
| pdf_path = "storybook_test.pdf" | |
| # ------------------------------------------------- | |
| # LOAD CSV | |
| # ------------------------------------------------- | |
| try: | |
| df = load_csv_with_auth(csv_url) | |
| except Exception as e: | |
| return f"β Failed loading CSV: {e}", None | |
| if "image" not in df.columns or "text" not in df.columns: | |
| return "β CSV must contain columns: image, text", None | |
| # Build HF dataset base path | |
| base_url = build_hf_base_url(csv_url) | |
| width, height = A4 | |
| # ------------------------------------------------- | |
| # TEXT STYLE (storybook readable) | |
| # ------------------------------------------------- | |
| text_style = ParagraphStyle( | |
| name="StoryText", | |
| fontName="Helvetica-Bold", | |
| fontSize=20, | |
| leading=26, | |
| alignment=TA_CENTER, | |
| textColor="white", | |
| ) | |
| c = canvas.Canvas(pdf_path, pagesize=A4) | |
| headers = {} | |
| token = os.getenv("HF_TOKEN") | |
| if token: | |
| headers["Authorization"] = f"Bearer {token}" | |
| # ================================================= | |
| # STORY PAGE LOOP | |
| # ================================================= | |
| for _, row in df.iterrows(): | |
| # ---------------- IMAGE URL ---------------- | |
| image_url = row["image"] | |
| if not image_url.startswith("http"): | |
| image_url = base_url + image_url | |
| # ---------------- LOAD IMAGE ---------------- | |
| try: | |
| img_bytes = requests.get(image_url, headers=headers).content | |
| except Exception: | |
| continue | |
| img_reader = ImageReader(BytesIO(img_bytes)) | |
| img_w, img_h = img_reader.getSize() | |
| # ============================================ | |
| # FIT IMAGE PERFECTLY INTO A4 (NO WHITE SPACE) | |
| # ============================================ | |
| page_ratio = width / height | |
| img_ratio = img_w / img_h | |
| if img_ratio > page_ratio: | |
| scale = height / img_h | |
| else: | |
| scale = width / img_w | |
| draw_w = img_w * scale | |
| draw_h = img_h * scale | |
| x = (width - draw_w) / 2 | |
| y = (height - draw_h) / 2 | |
| c.drawImage( | |
| img_reader, | |
| x, | |
| y, | |
| width=draw_w, | |
| height=draw_h, | |
| mask='auto' | |
| ) | |
| # ============================================ | |
| # SAFE TEXT AREA (BOOK STYLE) | |
| # ============================================ | |
| text_area_height = height * 0.30 | |
| text_y = inch * 0.8 | |
| story_text = str(row["text"]).replace("\n", "<br/>") | |
| paragraph = Paragraph(story_text, text_style) | |
| text_width = width - 2 * inch | |
| # Measure text BEFORE drawing | |
| w, h = paragraph.wrap(text_width, text_area_height) | |
| # -------------------------------------------- | |
| # If text fits β draw normally | |
| # -------------------------------------------- | |
| if h <= text_area_height: | |
| paragraph.drawOn( | |
| c, | |
| inch, | |
| text_y + (text_area_height - h) / 2 | |
| ) | |
| # -------------------------------------------- | |
| # If text too long β auto continuation page | |
| # -------------------------------------------- | |
| else: | |
| paragraph.drawOn(c, inch, text_y) | |
| c.showPage() | |
| continuation = Paragraph(story_text, text_style) | |
| continuation.wrap(text_width, height - 2 * inch) | |
| continuation.drawOn( | |
| c, | |
| inch, | |
| height / 2 | |
| ) | |
| c.showPage() | |
| c.save() | |
| return "β Professional Storybook PDF Generated!", pdf_path | |
| # ===================================================== | |
| # GRADIO UI | |
| # ===================================================== | |
| with gr.Blocks(title="CSV β Storybook PDF Tester") as demo: | |
| gr.Markdown("## π Storybook PDF Generator (HF Dataset CSV)") | |
| csv_input = gr.Textbox( | |
| label="CSV URL", | |
| value="https://huggingface.co/datasets/rahul7star/zimg-story-book/blob/main/Fo/dataset.csv" | |
| ) | |
| btn = gr.Button("Generate PDF") | |
| status = gr.Textbox(label="Status") | |
| output_pdf = gr.File(label="Download PDF") | |
| btn.click( | |
| generate_pdf_from_csv, | |
| inputs=csv_input, | |
| outputs=[status, output_pdf] | |
| ) | |
| demo.launch() |