File size: 1,778 Bytes
d660e96
005e1a9
d660e96
 
005e1a9
 
d660e96
005e1a9
d660e96
005e1a9
5155dfc
d660e96
 
5155dfc
 
 
 
005e1a9
 
5155dfc
005e1a9
 
 
 
 
 
 
 
 
 
 
ad5622f
005e1a9
d660e96
005e1a9
5155dfc
005e1a9
 
 
 
 
 
 
 
 
 
ad5622f
005e1a9
d660e96
005e1a9
 
 
ad5622f
005e1a9
d660e96
 
005e1a9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import gradio as gr
from docling.document_converter import DocumentConverter
import tempfile
import os
import shutil
import requests

converter = DocumentConverter()

def convert_input(pdf_file, pdf_url):
    source = None

    try:
        if pdf_file is not None:
            # pdf_file is a NamedString object with .name being the local path
            source = pdf_file.name
            pdf_preview_path = source
        elif pdf_url:
            source = pdf_url
            pdf_preview_path = None  # No local file to preview
        else:
            raise gr.Error("Please upload a PDF or provide a URL.")

        doc = converter.convert(source).document
        markdown = doc.export_to_markdown()

        # Save markdown to a file
        with tempfile.NamedTemporaryFile(delete=False, suffix=".md", mode="w", encoding="utf-8") as tmp_md:
            tmp_md.write(markdown)
            markdown_path = tmp_md.name

        return markdown, markdown_path

    except Exception as e:
        return None, f"Error: {str(e)}", None


with gr.Blocks() as demo:
    gr.Markdown("# πŸ“„ PDF to Markdown with `docling`")
    gr.Markdown("Upload a PDF or enter a URL (e.g., from arXiv). View Markdown and download it.")

    with gr.Row():
        pdf_input = gr.File(label="πŸ“€ Upload PDF", file_types=[".pdf"])
        url_input = gr.Textbox(label="🌐 Or enter PDF URL (e.g., https://arxiv.org/pdf/...)")
        md_file = gr.File(label="πŸ“ Download Markdown", interactive=False)

    convert_btn = gr.Button("Convert to Markdown")
    md_output = gr.Markdown(label="πŸ“„ Markdown Output")

    convert_btn.click(
        fn=convert_input,
        inputs=[pdf_input, url_input],
        outputs=[md_output, md_file]
    )

if __name__ == "__main__":
    demo.launch()