import streamlit as st import json from reportlab.lib.pagesizes import letter from reportlab.pdfgen import canvas from reportlab.lib import colors from io import BytesIO # Streamlit page config st.set_page_config( page_title="JSON to PDF Converter", layout="centered" ) PAGE_WIDTH, PAGE_HEIGHT = letter def generate_pdf(data): pdf_output = BytesIO() c = canvas.Canvas(pdf_output, pagesize=letter) page_created = False for ad_size, ad_data in data.items(): for item in ad_data: for line in item.get("text_lines", []): text = line.get("text", "") bbox = line.get("bbox") if not bbox: continue page_created = True x1, y1, x2, y2 = bbox pdf_x = x1 pdf_y = PAGE_HEIGHT - y2 box_height = y2 - y1 font_size = max(6, min(18, box_height * 0.8)) # Draw bounding box for debugging (optional) c.setStrokeColor(colors.lightgrey) c.rect(x1, PAGE_HEIGHT - y2, x2 - x1, box_height, fill=0) if "" in text: c.setFont("Helvetica-Bold", font_size) text = text.replace("", "").replace("", "") else: c.setFont("Helvetica", font_size) c.setFillColor(colors.black) c.drawString(pdf_x, pdf_y + (box_height - font_size), text) if page_created: c.showPage() # Guarantee at least one page if not page_created: c.setFont("Helvetica", 12) c.drawString(50, 750, "No OCR text found") c.showPage() c.save() pdf_output.seek(0) return pdf_output def main(): st.title("JSON to PDF Converter") st.write( "Upload a `results.json` file and download the generated PDF." ) uploaded_file = st.file_uploader("Choose a JSON file", type=["json"]) if uploaded_file is not None: try: data = json.load(uploaded_file) st.success("File uploaded successfully!") pdf_file = generate_pdf(data) st.download_button( label="Download PDF", data=pdf_file, file_name="output_fixed.pdf", mime="application/pdf" ) except Exception as e: st.error(f"Error processing file: {e}") if __name__ == "__main__": main()