Spaces:

kinely
/

complex-pdf-json

Sleeping

complex-pdf-json / app.py

Create app.py

5e5c234 verified about 1 year ago

1.13 kB

	import json
	import pdfplumber
	import streamlit as st

	# Streamlit App
	st.title("PDF to JSON Converter")

	# Upload PDF File
	uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

	if uploaded_file is not None:
	st.write("Processing your file...")

	# Initialize pdfplumber to read the PDF file
	with pdfplumber.open(uploaded_file) as pdf:
	pdf_content = {"pages": []}

	# Extract text from each page
	for i, page in enumerate(pdf.pages):
	pdf_content["pages"].append({
	"page_number": i + 1,
	"text": page.extract_text()
	})

	# Display JSON preview
	st.write("Extracted Content:")
	st.json(pdf_content)

	# Save JSON file
	json_filename = uploaded_file.name.replace(".pdf", ".json")
	with open(json_filename, "w", encoding="utf-8") as json_file:
	json.dump(pdf_content, json_file, ensure_ascii=False, indent=4)

	st.success(f"File converted to JSON! Download your file below.")
	st.download_button("Download JSON", data=json.dumps(pdf_content, ensure_ascii=False, indent=4), file_name=json_filename)