File size: 2,508 Bytes
a1027da
 
 
 
 
 
 
09ec541
a1027da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09ec541
a1027da
09ec541
a1027da
 
 
 
 
 
 
 
09ec541
a1027da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09ec541
a1027da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import streamlit as st
import json
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib import colors
from io import BytesIO

# Streamlit page config
st.set_page_config(
    page_title="JSON to PDF Converter",
    layout="centered"
)

PAGE_WIDTH, PAGE_HEIGHT = letter

def generate_pdf(data):
    pdf_output = BytesIO()
    c = canvas.Canvas(pdf_output, pagesize=letter)
    page_created = False

    for ad_size, ad_data in data.items():
        for item in ad_data:
            for line in item.get("text_lines", []):
                text = line.get("text", "")
                bbox = line.get("bbox")

                if not bbox:
                    continue

                page_created = True
                x1, y1, x2, y2 = bbox

                pdf_x = x1
                pdf_y = PAGE_HEIGHT - y2

                box_height = y2 - y1
                font_size = max(6, min(18, box_height * 0.8))

                # Draw bounding box for debugging (optional)
                c.setStrokeColor(colors.lightgrey)
                c.rect(x1, PAGE_HEIGHT - y2, x2 - x1, box_height, fill=0)

                if "<b>" in text:
                    c.setFont("Helvetica-Bold", font_size)
                    text = text.replace("<b>", "").replace("</b>", "")
                else:
                    c.setFont("Helvetica", font_size)

                c.setFillColor(colors.black)
                c.drawString(pdf_x, pdf_y + (box_height - font_size), text)

        if page_created:
            c.showPage()

    # Guarantee at least one page
    if not page_created:
        c.setFont("Helvetica", 12)
        c.drawString(50, 750, "No OCR text found")
        c.showPage()

    c.save()
    pdf_output.seek(0)
    return pdf_output

def main():
    st.title("JSON to PDF Converter")
    st.write(
        "Upload a `results.json` file and download the generated PDF."
    )

    uploaded_file = st.file_uploader("Choose a JSON file", type=["json"])

    if uploaded_file is not None:
        try:
            data = json.load(uploaded_file)
            st.success("File uploaded successfully!")

            pdf_file = generate_pdf(data)

            st.download_button(
                label="Download PDF",
                data=pdf_file,
                file_name="output_fixed.pdf",
                mime="application/pdf"
            )
        except Exception as e:
            st.error(f"Error processing file: {e}")

if __name__ == "__main__":
    main()