Spaces:
Sleeping
Sleeping
File size: 1,126 Bytes
5e5c234 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import json
import pdfplumber
import streamlit as st
# Streamlit App
st.title("PDF to JSON Converter")
# Upload PDF File
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
if uploaded_file is not None:
st.write("Processing your file...")
# Initialize pdfplumber to read the PDF file
with pdfplumber.open(uploaded_file) as pdf:
pdf_content = {"pages": []}
# Extract text from each page
for i, page in enumerate(pdf.pages):
pdf_content["pages"].append({
"page_number": i + 1,
"text": page.extract_text()
})
# Display JSON preview
st.write("Extracted Content:")
st.json(pdf_content)
# Save JSON file
json_filename = uploaded_file.name.replace(".pdf", ".json")
with open(json_filename, "w", encoding="utf-8") as json_file:
json.dump(pdf_content, json_file, ensure_ascii=False, indent=4)
st.success(f"File converted to JSON! Download your file below.")
st.download_button("Download JSON", data=json.dumps(pdf_content, ensure_ascii=False, indent=4), file_name=json_filename)
|