kinely commited on
Commit
5ed7b6c
·
verified ·
1 Parent(s): 2ba4645

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -0
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from PyPDF2 import PdfReader
3
+ import streamlit as st
4
+
5
+ # Streamlit App
6
+ st.title("PDF to JSON Converter")
7
+
8
+ # Upload PDF File
9
+ uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
10
+
11
+ if uploaded_file is not None:
12
+ st.write("Processing your file...")
13
+
14
+ # Read the PDF file
15
+ reader = PdfReader(uploaded_file)
16
+ pdf_content = {"pages": []}
17
+
18
+ for i, page in enumerate(reader.pages):
19
+ pdf_content["pages"].append({
20
+ "page_number": i + 1,
21
+ "text": page.extract_text()
22
+ })
23
+
24
+ # Display JSON preview
25
+ st.write("Extracted Content:")
26
+ st.json(pdf_content)
27
+
28
+ # Save JSON file
29
+ json_filename = uploaded_file.name.replace(".pdf", ".json")
30
+ with open(json_filename, "w", encoding="utf-8") as json_file:
31
+ json.dump(pdf_content, json_file, ensure_ascii=False, indent=4)
32
+
33
+ st.success(f"File converted to JSON! Download your file below.")
34
+ st.download_button("Download JSON", data=json.dumps(pdf_content, ensure_ascii=False, indent=4), file_name=json_filename)