File size: 1,126 Bytes
5e5c234
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import json
import pdfplumber
import streamlit as st

# Streamlit App
st.title("PDF to JSON Converter")

# Upload PDF File
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

if uploaded_file is not None:
    st.write("Processing your file...")

    # Initialize pdfplumber to read the PDF file
    with pdfplumber.open(uploaded_file) as pdf:
        pdf_content = {"pages": []}

        # Extract text from each page
        for i, page in enumerate(pdf.pages):
            pdf_content["pages"].append({
                "page_number": i + 1,
                "text": page.extract_text()
            })

    # Display JSON preview
    st.write("Extracted Content:")
    st.json(pdf_content)

    # Save JSON file
    json_filename = uploaded_file.name.replace(".pdf", ".json")
    with open(json_filename, "w", encoding="utf-8") as json_file:
        json.dump(pdf_content, json_file, ensure_ascii=False, indent=4)

    st.success(f"File converted to JSON! Download your file below.")
    st.download_button("Download JSON", data=json.dumps(pdf_content, ensure_ascii=False, indent=4), file_name=json_filename)