Spaces:
Running
Running
| import gradio as gr | |
| from docling.document_converter import DocumentConverter | |
| import tempfile | |
| import os | |
| import shutil | |
| import requests | |
| converter = DocumentConverter() | |
| def convert_input(pdf_file, pdf_url): | |
| source = None | |
| try: | |
| if pdf_file is not None: | |
| # pdf_file is a NamedString object with .name being the local path | |
| source = pdf_file.name | |
| pdf_preview_path = source | |
| elif pdf_url: | |
| source = pdf_url | |
| pdf_preview_path = None # No local file to preview | |
| else: | |
| raise gr.Error("Please upload a PDF or provide a URL.") | |
| doc = converter.convert(source).document | |
| markdown = doc.export_to_markdown() | |
| # Save markdown to a file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".md", mode="w", encoding="utf-8") as tmp_md: | |
| tmp_md.write(markdown) | |
| markdown_path = tmp_md.name | |
| return markdown, markdown_path | |
| except Exception as e: | |
| return None, f"Error: {str(e)}", None | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π PDF to Markdown with `docling`") | |
| gr.Markdown("Upload a PDF or enter a URL (e.g., from arXiv). View Markdown and download it.") | |
| with gr.Row(): | |
| pdf_input = gr.File(label="π€ Upload PDF", file_types=[".pdf"]) | |
| url_input = gr.Textbox(label="π Or enter PDF URL (e.g., https://arxiv.org/pdf/...)") | |
| md_file = gr.File(label="π Download Markdown", interactive=False) | |
| convert_btn = gr.Button("Convert to Markdown") | |
| md_output = gr.Markdown(label="π Markdown Output") | |
| convert_btn.click( | |
| fn=convert_input, | |
| inputs=[pdf_input, url_input], | |
| outputs=[md_output, md_file] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |