raahinaez's picture
Update app.py
e56a034 verified
import json
import os
import gradio as gr
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib import colors
from io import BytesIO # For saving the PDF to memory
PAGE_WIDTH, PAGE_HEIGHT = letter
def load_json_from_file(file):
# Gradio file input 'file' is a NamedString object
# Access the file content as bytes via 'file.data'
file_content = file.data.decode("utf-8") # Decode file bytes to string
return json.loads(file_content) # Parse the string into a Python dictionary
def generate_pdf(data):
buffer = BytesIO() # Create an in-memory file-like object to hold the PDF
c = canvas.Canvas(buffer, pagesize=letter)
page_created = False
for ad_size, ad_data in data.items():
for item in ad_data:
for line in item.get("text_lines", []):
page_created = True
text = line.get("text", "")
bbox = line.get("bbox", None)
if not bbox:
continue
x1, y1, x2, y2 = bbox
pdf_x = x1
pdf_y = PAGE_HEIGHT - y2
box_height = y2 - y1
font_size = max(6, min(18, box_height * 0.8))
# Debug box
c.setStrokeColor(colors.lightgrey)
c.rect(
x1,
PAGE_HEIGHT - y2,
x2 - x1,
box_height,
fill=0
)
if "<b>" in text:
c.setFont("Helvetica-Bold", font_size)
text = text.replace("<b>", "").replace("</b>", "")
else:
c.setFont("Helvetica", font_size)
c.setFillColor(colors.black)
c.drawString(
pdf_x,
pdf_y + (box_height - font_size),
text
)
if page_created:
c.showPage()
# 🔥 GUARANTEE at least one page
if not page_created:
c.setFont("Helvetica", 12)
c.drawString(50, 750, "No OCR text found")
c.showPage()
c.save()
buffer.seek(0) # Move cursor back to the beginning of the BytesIO buffer
return buffer # Return the in-memory PDF file
# ---------------- GRADIO INTERFACE ----------------
def process_json_and_generate_pdf(json_file):
# Read the uploaded file content as JSON
json_data = load_json_from_file(json_file)
# Generate the PDF and return as buffer
pdf_buffer = generate_pdf(json_data)
return pdf_buffer # Return the buffer for downloading
# Create the Gradio interface
iface = gr.Interface(
fn=process_json_and_generate_pdf,
inputs=gr.File(label="Upload JSON File"), # File input for JSON file
outputs=gr.File(label="Download Generated PDF"), # Output the generated PDF file
title="JSON to PDF Generator",
description="Upload a JSON file to generate a PDF based on the OCR text data."
)
iface.launch()