import gradio as gr from docling.document_converter import DocumentConverter import tempfile import os import shutil import requests converter = DocumentConverter() def convert_input(pdf_file, pdf_url): source = None try: if pdf_file is not None: # pdf_file is a NamedString object with .name being the local path source = pdf_file.name pdf_preview_path = source elif pdf_url: source = pdf_url pdf_preview_path = None # No local file to preview else: raise gr.Error("Please upload a PDF or provide a URL.") doc = converter.convert(source).document markdown = doc.export_to_markdown() # Save markdown to a file with tempfile.NamedTemporaryFile(delete=False, suffix=".md", mode="w", encoding="utf-8") as tmp_md: tmp_md.write(markdown) markdown_path = tmp_md.name return markdown, markdown_path except Exception as e: return None, f"Error: {str(e)}", None with gr.Blocks() as demo: gr.Markdown("# 📄 PDF to Markdown with `docling`") gr.Markdown("Upload a PDF or enter a URL (e.g., from arXiv). View Markdown and download it.") with gr.Row(): pdf_input = gr.File(label="📤 Upload PDF", file_types=[".pdf"]) url_input = gr.Textbox(label="🌐 Or enter PDF URL (e.g., https://arxiv.org/pdf/...)") md_file = gr.File(label="📝 Download Markdown", interactive=False) convert_btn = gr.Button("Convert to Markdown") md_output = gr.Markdown(label="📄 Markdown Output") convert_btn.click( fn=convert_input, inputs=[pdf_input, url_input], outputs=[md_output, md_file] ) if __name__ == "__main__": demo.launch()