Spaces:
Sleeping
Sleeping
| import json | |
| import pdfplumber | |
| import streamlit as st | |
| # Streamlit App | |
| st.title("PDF to JSON Converter") | |
| # Upload PDF File | |
| uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) | |
| if uploaded_file is not None: | |
| st.write("Processing your file...") | |
| # Initialize pdfplumber to read the PDF file | |
| with pdfplumber.open(uploaded_file) as pdf: | |
| pdf_content = {"pages": []} | |
| # Extract text from each page | |
| for i, page in enumerate(pdf.pages): | |
| pdf_content["pages"].append({ | |
| "page_number": i + 1, | |
| "text": page.extract_text() | |
| }) | |
| # Display JSON preview | |
| st.write("Extracted Content:") | |
| st.json(pdf_content) | |
| # Save JSON file | |
| json_filename = uploaded_file.name.replace(".pdf", ".json") | |
| with open(json_filename, "w", encoding="utf-8") as json_file: | |
| json.dump(pdf_content, json_file, ensure_ascii=False, indent=4) | |
| st.success(f"File converted to JSON! Download your file below.") | |
| st.download_button("Download JSON", data=json.dumps(pdf_content, ensure_ascii=False, indent=4), file_name=json_filename) | |