Spaces:
Running
Running
File size: 1,778 Bytes
d660e96 005e1a9 d660e96 005e1a9 d660e96 005e1a9 d660e96 005e1a9 5155dfc d660e96 5155dfc 005e1a9 5155dfc 005e1a9 ad5622f 005e1a9 d660e96 005e1a9 5155dfc 005e1a9 ad5622f 005e1a9 d660e96 005e1a9 ad5622f 005e1a9 d660e96 005e1a9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | import gradio as gr
from docling.document_converter import DocumentConverter
import tempfile
import os
import shutil
import requests
converter = DocumentConverter()
def convert_input(pdf_file, pdf_url):
source = None
try:
if pdf_file is not None:
# pdf_file is a NamedString object with .name being the local path
source = pdf_file.name
pdf_preview_path = source
elif pdf_url:
source = pdf_url
pdf_preview_path = None # No local file to preview
else:
raise gr.Error("Please upload a PDF or provide a URL.")
doc = converter.convert(source).document
markdown = doc.export_to_markdown()
# Save markdown to a file
with tempfile.NamedTemporaryFile(delete=False, suffix=".md", mode="w", encoding="utf-8") as tmp_md:
tmp_md.write(markdown)
markdown_path = tmp_md.name
return markdown, markdown_path
except Exception as e:
return None, f"Error: {str(e)}", None
with gr.Blocks() as demo:
gr.Markdown("# π PDF to Markdown with `docling`")
gr.Markdown("Upload a PDF or enter a URL (e.g., from arXiv). View Markdown and download it.")
with gr.Row():
pdf_input = gr.File(label="π€ Upload PDF", file_types=[".pdf"])
url_input = gr.Textbox(label="π Or enter PDF URL (e.g., https://arxiv.org/pdf/...)")
md_file = gr.File(label="π Download Markdown", interactive=False)
convert_btn = gr.Button("Convert to Markdown")
md_output = gr.Markdown(label="π Markdown Output")
convert_btn.click(
fn=convert_input,
inputs=[pdf_input, url_input],
outputs=[md_output, md_file]
)
if __name__ == "__main__":
demo.launch() |