kinely commited on
Commit
5e5c234
·
verified ·
1 Parent(s): 3d27707

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -0
app.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import pdfplumber
3
+ import streamlit as st
4
+
5
+ # Streamlit App
6
+ st.title("PDF to JSON Converter")
7
+
8
+ # Upload PDF File
9
+ uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
10
+
11
+ if uploaded_file is not None:
12
+ st.write("Processing your file...")
13
+
14
+ # Initialize pdfplumber to read the PDF file
15
+ with pdfplumber.open(uploaded_file) as pdf:
16
+ pdf_content = {"pages": []}
17
+
18
+ # Extract text from each page
19
+ for i, page in enumerate(pdf.pages):
20
+ pdf_content["pages"].append({
21
+ "page_number": i + 1,
22
+ "text": page.extract_text()
23
+ })
24
+
25
+ # Display JSON preview
26
+ st.write("Extracted Content:")
27
+ st.json(pdf_content)
28
+
29
+ # Save JSON file
30
+ json_filename = uploaded_file.name.replace(".pdf", ".json")
31
+ with open(json_filename, "w", encoding="utf-8") as json_file:
32
+ json.dump(pdf_content, json_file, ensure_ascii=False, indent=4)
33
+
34
+ st.success(f"File converted to JSON! Download your file below.")
35
+ st.download_button("Download JSON", data=json.dumps(pdf_content, ensure_ascii=False, indent=4), file_name=json_filename)