Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| import gradio as gr | |
| from reportlab.lib.pagesizes import letter | |
| from reportlab.pdfgen import canvas | |
| from reportlab.lib import colors | |
| from io import BytesIO # For saving the PDF to memory | |
| PAGE_WIDTH, PAGE_HEIGHT = letter | |
| def load_json_from_file(file): | |
| # Gradio file input 'file' is a NamedString object | |
| # Access the file content as bytes via 'file.data' | |
| file_content = file.data.decode("utf-8") # Decode file bytes to string | |
| return json.loads(file_content) # Parse the string into a Python dictionary | |
| def generate_pdf(data): | |
| buffer = BytesIO() # Create an in-memory file-like object to hold the PDF | |
| c = canvas.Canvas(buffer, pagesize=letter) | |
| page_created = False | |
| for ad_size, ad_data in data.items(): | |
| for item in ad_data: | |
| for line in item.get("text_lines", []): | |
| page_created = True | |
| text = line.get("text", "") | |
| bbox = line.get("bbox", None) | |
| if not bbox: | |
| continue | |
| x1, y1, x2, y2 = bbox | |
| pdf_x = x1 | |
| pdf_y = PAGE_HEIGHT - y2 | |
| box_height = y2 - y1 | |
| font_size = max(6, min(18, box_height * 0.8)) | |
| # Debug box | |
| c.setStrokeColor(colors.lightgrey) | |
| c.rect( | |
| x1, | |
| PAGE_HEIGHT - y2, | |
| x2 - x1, | |
| box_height, | |
| fill=0 | |
| ) | |
| if "<b>" in text: | |
| c.setFont("Helvetica-Bold", font_size) | |
| text = text.replace("<b>", "").replace("</b>", "") | |
| else: | |
| c.setFont("Helvetica", font_size) | |
| c.setFillColor(colors.black) | |
| c.drawString( | |
| pdf_x, | |
| pdf_y + (box_height - font_size), | |
| text | |
| ) | |
| if page_created: | |
| c.showPage() | |
| # 🔥 GUARANTEE at least one page | |
| if not page_created: | |
| c.setFont("Helvetica", 12) | |
| c.drawString(50, 750, "No OCR text found") | |
| c.showPage() | |
| c.save() | |
| buffer.seek(0) # Move cursor back to the beginning of the BytesIO buffer | |
| return buffer # Return the in-memory PDF file | |
| # ---------------- GRADIO INTERFACE ---------------- | |
| def process_json_and_generate_pdf(json_file): | |
| # Read the uploaded file content as JSON | |
| json_data = load_json_from_file(json_file) | |
| # Generate the PDF and return as buffer | |
| pdf_buffer = generate_pdf(json_data) | |
| return pdf_buffer # Return the buffer for downloading | |
| # Create the Gradio interface | |
| iface = gr.Interface( | |
| fn=process_json_and_generate_pdf, | |
| inputs=gr.File(label="Upload JSON File"), # File input for JSON file | |
| outputs=gr.File(label="Download Generated PDF"), # Output the generated PDF file | |
| title="JSON to PDF Generator", | |
| description="Upload a JSON file to generate a PDF based on the OCR text data." | |
| ) | |
| iface.launch() | |