File size: 2,765 Bytes
27acee8
fff6aaf
 
 
 
eaca86f
27acee8
fff6aaf
27acee8
3c82c25
 
 
27acee8
 
652e7a9
 
27acee8
 
 
eaca86f
 
3c82c25
eaca86f
 
fff6aaf
27acee8
0507dfa
 
 
 
fff6aaf
 
 
 
27acee8
fff6aaf
 
27acee8
fff6aaf
0507dfa
 
fff6aaf
 
27acee8
0507dfa
 
 
 
 
 
 
 
 
fff6aaf
0507dfa
27acee8
fff6aaf
 
 
 
 
27acee8
fff6aaf
27acee8
 
0507dfa
27acee8
fff6aaf
 
 
 
27acee8
fff6aaf
eaca86f
3c82c25
0507dfa
3c82c25
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import gradio as gr
import json
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib import colors
import tempfile

PAGE_WIDTH, PAGE_HEIGHT = letter

# -------------------
# PDF generation function
# -------------------
def generate_pdf(file):
    try:
        with open(file.name, "r", encoding="utf-8") as f:
            data = json.load(f)
    except Exception as e:
        return None, f"Error reading JSON: {e}"

    temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
    pdf_path = temp_pdf.name
    temp_pdf.close()

    c = canvas.Canvas(pdf_path, pagesize=letter)
    page_created = False

    # Current vertical position
    current_y = PAGE_HEIGHT - 50  # start from top margin
    bottom_margin = 50

    for ad_size, ad_data in data.items():
        for item in ad_data:
            for line in item.get("text_lines", []):
                text = line.get("text", "")
                bbox = line.get("bbox")
                if not bbox:
                    continue
                page_created = True
                x1, y1, x2, y2 = bbox

                # Height of text box
                box_height = y2 - y1
                font_size = max(6, min(18, box_height * 0.8))

                # Check if the next line fits in current page, else start new page
                if current_y - font_size < bottom_margin:
                    c.showPage()
                    current_y = PAGE_HEIGHT - 50  # reset for new page

                pdf_x = x1
                pdf_y = current_y - box_height

                # Optional: draw bounding box
                c.setStrokeColor(colors.lightgrey)
                c.rect(pdf_x, pdf_y, x2 - x1, box_height, fill=0)

                if "<b>" in text:
                    c.setFont("Helvetica-Bold", font_size)
                    text = text.replace("<b>", "").replace("</b>", "")
                else:
                    c.setFont("Helvetica", font_size)

                c.setFillColor(colors.black)
                c.drawString(pdf_x, pdf_y + (box_height - font_size), text)

                current_y = pdf_y - 5  # move cursor down with small gap

    if not page_created:
        c.setFont("Helvetica", 12)
        c.drawString(50, 750, "No OCR text found")
        c.showPage()

    c.save()
    return pdf_path, "PDF generated successfully!"


# -------------------
# Gradio Interface
# -------------------
iface = gr.Interface(
    fn=generate_pdf,
    inputs=gr.File(file_types=[".json"], label="Upload JSON"),
    outputs=[gr.File(label="Download PDF"), gr.Textbox(label="Status")],
    title="JSON to PDF Converter",
    description="Upload a `results.json` file and download the generated PDF."
)

# Launch the app
iface.launch()