Spaces:

yasserrmd
/

DoclingConverter

Running on Zero

File size: 1,762 Bytes

e2d728a
 
2803b23
8e024f6
e2d728a
39ea117
8e024f6
e2d728a
2803b23
 
39ea117
2803b23
39ea117
2803b23
e2d728a
4678d36
39ea117
e2d728a
 
 
39ea117
 
 
e2d728a
39ea117
e2d728a
39ea117
 
e2d728a
 
 
39ea117
a07d796
39ea117
 
 
 
 
 
e2d728a
39ea117
a07d796
 
 
 
39ea117
a07d796
 
 
e2d728a
39ea117

import gradio as gr
from docling.document_converter import DocumentConverter
from docling.datamodel.pipeline_options import PdfPipelineOptions, TesseractCliOcrOptions
import spaces

# GPU decorator not really required for Docling OCR, but kept if you want
@spaces.GPU
def convert_document(file, output_format):
    pdf_opts = PdfPipelineOptions(
        do_ocr=True,
        ocr_options=TesseractCliOcrOptions(lang=["eng"]),  
    )

    converter = DocumentConverter(pipeline_options=pdf_opts)
    result = converter.convert(file.name)

    # Choose output format safely
    if output_format == "Markdown":
        converted_text = result.document.export_to_markdown()
    elif output_format == "JSON":
        # JSON needs to be dumped into a string for the Textbox
        import json
        converted_text = json.dumps(result.document.export_to_json(), indent=2)
    else:
        converted_text = "⚠️ Unsupported format"

    # Metadata always JSON-friendly
    metadata = {"Available Attributes": dir(result.document)}

    return converted_text, metadata


with gr.Blocks() as app:
    gr.Markdown("# 📄 Document Converter with Docling OCR")
    gr.Markdown("Upload a PDF, choose the output format, and get the converted text + metadata.")

    with gr.Row():
        file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
        format_input = gr.Radio(["Markdown", "JSON"], label="Choose Output Format")

    output_text = gr.Textbox(label="Converted Document", lines=20)
    output_metadata = gr.JSON(label="Metadata")

    convert_button = gr.Button("Convert")
    convert_button.click(
        fn=convert_document,
        inputs=[file_input, format_input],
        outputs=[output_text, output_metadata]
    )

app.launch(debug=True)