kinely's picture
Update app.py
1b3ecde verified
raw
history blame contribute delete
538 Bytes
import json
from PyPDF2 import PdfReader
import gradio as gr
def convert_pdf_to_json(pdf_file):
reader = PdfReader(pdf_file.name)
data = {"pages": []}
for i, page in enumerate(reader.pages):
text = page.extract_text()
data["pages"].append({"page_number": i + 1, "text": text.strip()})
return json.dumps(data, indent=4, ensure_ascii=False)
iface = gr.Interface(
fn=convert_pdf_to_json,
inputs=gr.File(label="Upload PDF"),
outputs="text",
title="PDF to JSON Converter",
)
iface.launch()