docling_demo / app.py
ma7583's picture
Update app.py
ad5622f verified
import gradio as gr
from docling.document_converter import DocumentConverter
import tempfile
import os
import shutil
import requests
converter = DocumentConverter()
def convert_input(pdf_file, pdf_url):
source = None
try:
if pdf_file is not None:
# pdf_file is a NamedString object with .name being the local path
source = pdf_file.name
pdf_preview_path = source
elif pdf_url:
source = pdf_url
pdf_preview_path = None # No local file to preview
else:
raise gr.Error("Please upload a PDF or provide a URL.")
doc = converter.convert(source).document
markdown = doc.export_to_markdown()
# Save markdown to a file
with tempfile.NamedTemporaryFile(delete=False, suffix=".md", mode="w", encoding="utf-8") as tmp_md:
tmp_md.write(markdown)
markdown_path = tmp_md.name
return markdown, markdown_path
except Exception as e:
return None, f"Error: {str(e)}", None
with gr.Blocks() as demo:
gr.Markdown("# πŸ“„ PDF to Markdown with `docling`")
gr.Markdown("Upload a PDF or enter a URL (e.g., from arXiv). View Markdown and download it.")
with gr.Row():
pdf_input = gr.File(label="πŸ“€ Upload PDF", file_types=[".pdf"])
url_input = gr.Textbox(label="🌐 Or enter PDF URL (e.g., https://arxiv.org/pdf/...)")
md_file = gr.File(label="πŸ“ Download Markdown", interactive=False)
convert_btn = gr.Button("Convert to Markdown")
md_output = gr.Markdown(label="πŸ“„ Markdown Output")
convert_btn.click(
fn=convert_input,
inputs=[pdf_input, url_input],
outputs=[md_output, md_file]
)
if __name__ == "__main__":
demo.launch()