arasuezofis commited on
Commit
fff6aaf
·
verified ·
1 Parent(s): b6824e5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -0
app.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import json
3
+ import os
4
+ from reportlab.lib.pagesizes import letter
5
+ from reportlab.pdfgen import canvas
6
+ from reportlab.lib import colors
7
+ from io import BytesIO
8
+
9
+ PAGE_WIDTH, PAGE_HEIGHT = letter
10
+
11
+ def load_json_from_file(file_path):
12
+ with open(file_path, "r", encoding="utf-8") as f:
13
+ return json.load(f)
14
+
15
+ def generate_pdf(data):
16
+ # Create a BytesIO stream to hold the PDF in memory
17
+ pdf_output = BytesIO()
18
+
19
+ c = canvas.Canvas(pdf_output, pagesize=letter)
20
+ page_created = False
21
+
22
+ for ad_size, ad_data in data.items():
23
+ for item in ad_data:
24
+ for line in item.get("text_lines", []):
25
+ page_created = True
26
+
27
+ text = line.get("text", "")
28
+ bbox = line.get("bbox", None)
29
+
30
+ if not bbox:
31
+ continue
32
+
33
+ x1, y1, x2, y2 = bbox
34
+
35
+ pdf_x = x1
36
+ pdf_y = PAGE_HEIGHT - y2
37
+
38
+ box_height = y2 - y1
39
+ font_size = max(6, min(18, box_height * 0.8))
40
+
41
+ # Debug box
42
+ c.setStrokeColor(colors.lightgrey)
43
+ c.rect(
44
+ x1,
45
+ PAGE_HEIGHT - y2,
46
+ x2 - x1,
47
+ box_height,
48
+ fill=0
49
+ )
50
+
51
+ if "<b>" in text:
52
+ c.setFont("Helvetica-Bold", font_size)
53
+ text = text.replace("<b>", "").replace("</b>", "")
54
+ else:
55
+ c.setFont("Helvetica", font_size)
56
+
57
+ c.setFillColor(colors.black)
58
+ c.drawString(
59
+ pdf_x,
60
+ pdf_y + (box_height - font_size),
61
+ text
62
+ )
63
+
64
+ if page_created:
65
+ c.showPage()
66
+
67
+ # 🔥 GUARANTEE at least one page
68
+ if not page_created:
69
+ c.setFont("Helvetica", 12)
70
+ c.drawString(50, 750, "No OCR text found")
71
+ c.showPage()
72
+
73
+ c.save()
74
+
75
+ # Move the pointer to the beginning of the stream for download
76
+ pdf_output.seek(0)
77
+ return pdf_output
78
+
79
+ # Streamlit UI to upload JSON and download PDF
80
+ def main():
81
+ st.title("JSON to PDF Converter")
82
+
83
+ st.write(
84
+ """
85
+ Upload a `results.json` file, and this app will generate a PDF
86
+ based on the text and bounding box data in the JSON.
87
+ """
88
+ )
89
+
90
+ # File uploader
91
+ uploaded_file = st.file_uploader("Choose a JSON file", type=["json"])
92
+
93
+ if uploaded_file is not None:
94
+ try:
95
+ data = json.load(uploaded_file)
96
+ st.success("File uploaded successfully!")
97
+
98
+ # Generate PDF from the uploaded data
99
+ pdf_file = generate_pdf(data)
100
+
101
+ # Provide a download link
102
+ st.download_button(
103
+ label="Download PDF",
104
+ data=pdf_file,
105
+ file_name="output_fixed.pdf",
106
+ mime="application/pdf"
107
+ )
108
+ except Exception as e:
109
+ st.error(f"Error processing the file: {e}")
110
+
111
+ if __name__ == "__main__":
112
+ main()