arasuezofis commited on
Commit
a1027da
·
verified ·
1 Parent(s): 9fe7db3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -0
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import json
3
+ from reportlab.lib.pagesizes import letter
4
+ from reportlab.pdfgen import canvas
5
+ from reportlab.lib import colors
6
+ from io import BytesIO
7
+
8
+ # Streamlit page config (IMPORTANT for HF)
9
+ st.set_page_config(
10
+ page_title="JSON to PDF Converter",
11
+ layout="centered"
12
+ )
13
+
14
+ PAGE_WIDTH, PAGE_HEIGHT = letter
15
+
16
+
17
+ def generate_pdf(data):
18
+ pdf_output = BytesIO()
19
+ c = canvas.Canvas(pdf_output, pagesize=letter)
20
+ page_created = False
21
+
22
+ for ad_size, ad_data in data.items():
23
+ for item in ad_data:
24
+ for line in item.get("text_lines", []):
25
+ text = line.get("text", "")
26
+ bbox = line.get("bbox")
27
+
28
+ if not bbox:
29
+ continue
30
+
31
+ page_created = True
32
+
33
+ x1, y1, x2, y2 = bbox
34
+
35
+ pdf_x = x1
36
+ pdf_y = PAGE_HEIGHT - y2
37
+
38
+ box_height = y2 - y1
39
+ font_size = max(6, min(18, box_height * 0.8))
40
+
41
+ # Draw bounding box (debug)
42
+ c.setStrokeColor(colors.lightgrey)
43
+ c.rect(
44
+ x1,
45
+ PAGE_HEIGHT - y2,
46
+ x2 - x1,
47
+ box_height,
48
+ fill=0
49
+ )
50
+
51
+ if "<b>" in text:
52
+ c.setFont("Helvetica-Bold", font_size)
53
+ text = text.replace("<b>", "").replace("</b>", "")
54
+ else:
55
+ c.setFont("Helvetica", font_size)
56
+
57
+ c.setFillColor(colors.black)
58
+ c.drawString(
59
+ pdf_x,
60
+ pdf_y + (box_height - font_size),
61
+ text
62
+ )
63
+
64
+ if page_created:
65
+ c.showPage()
66
+
67
+ # Guarantee at least one page
68
+ if not page_created:
69
+ c.setFont("Helvetica", 12)
70
+ c.drawString(50, 750, "No OCR text found")
71
+ c.showPage()
72
+
73
+ c.save()
74
+ pdf_output.seek(0)
75
+ return pdf_output
76
+
77
+
78
+ def main():
79
+ st.title("JSON to PDF Converter")
80
+
81
+ st.write(
82
+ "Upload a `results.json` file and download the generated PDF."
83
+ )
84
+
85
+ uploaded_file = st.file_uploader(
86
+ "Choose a JSON file",
87
+ type=["json"]
88
+ )
89
+
90
+ if uploaded_file is not None:
91
+ try:
92
+ data = json.load(uploaded_file)
93
+ st.success("File uploaded successfully!")
94
+
95
+ pdf_file = generate_pdf(data)
96
+
97
+ st.download_button(
98
+ label="Download PDF",
99
+ data=pdf_file,
100
+ file_name="output_fixed.pdf",
101
+ mime="application/pdf"
102
+ )
103
+
104
+ except Exception as e:
105
+ st.error(f"Error processing file: {e}")
106
+
107
+
108
+ if __name__ == "__main__":
109
+ main()