arasuezofis's picture
Update app.py
09ec541 verified
import streamlit as st
import json
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib import colors
from io import BytesIO
# Streamlit page config
st.set_page_config(
page_title="JSON to PDF Converter",
layout="centered"
)
PAGE_WIDTH, PAGE_HEIGHT = letter
def generate_pdf(data):
pdf_output = BytesIO()
c = canvas.Canvas(pdf_output, pagesize=letter)
page_created = False
for ad_size, ad_data in data.items():
for item in ad_data:
for line in item.get("text_lines", []):
text = line.get("text", "")
bbox = line.get("bbox")
if not bbox:
continue
page_created = True
x1, y1, x2, y2 = bbox
pdf_x = x1
pdf_y = PAGE_HEIGHT - y2
box_height = y2 - y1
font_size = max(6, min(18, box_height * 0.8))
# Draw bounding box for debugging (optional)
c.setStrokeColor(colors.lightgrey)
c.rect(x1, PAGE_HEIGHT - y2, x2 - x1, box_height, fill=0)
if "<b>" in text:
c.setFont("Helvetica-Bold", font_size)
text = text.replace("<b>", "").replace("</b>", "")
else:
c.setFont("Helvetica", font_size)
c.setFillColor(colors.black)
c.drawString(pdf_x, pdf_y + (box_height - font_size), text)
if page_created:
c.showPage()
# Guarantee at least one page
if not page_created:
c.setFont("Helvetica", 12)
c.drawString(50, 750, "No OCR text found")
c.showPage()
c.save()
pdf_output.seek(0)
return pdf_output
def main():
st.title("JSON to PDF Converter")
st.write(
"Upload a `results.json` file and download the generated PDF."
)
uploaded_file = st.file_uploader("Choose a JSON file", type=["json"])
if uploaded_file is not None:
try:
data = json.load(uploaded_file)
st.success("File uploaded successfully!")
pdf_file = generate_pdf(data)
st.download_button(
label="Download PDF",
data=pdf_file,
file_name="output_fixed.pdf",
mime="application/pdf"
)
except Exception as e:
st.error(f"Error processing file: {e}")
if __name__ == "__main__":
main()