import gradio as gr from docling.document_converter import DocumentConverter from docling.datamodel.pipeline_options import PdfPipelineOptions, TesseractCliOcrOptions import spaces # GPU decorator not really required for Docling OCR, but kept if you want @spaces.GPU def convert_document(file, output_format): pdf_opts = PdfPipelineOptions( do_ocr=True, ocr_options=TesseractCliOcrOptions(lang=["eng"]), ) converter = DocumentConverter(pipeline_options=pdf_opts) result = converter.convert(file.name) # Choose output format safely if output_format == "Markdown": converted_text = result.document.export_to_markdown() elif output_format == "JSON": # JSON needs to be dumped into a string for the Textbox import json converted_text = json.dumps(result.document.export_to_json(), indent=2) else: converted_text = "⚠️ Unsupported format" # Metadata always JSON-friendly metadata = {"Available Attributes": dir(result.document)} return converted_text, metadata with gr.Blocks() as app: gr.Markdown("# 📄 Document Converter with Docling OCR") gr.Markdown("Upload a PDF, choose the output format, and get the converted text + metadata.") with gr.Row(): file_input = gr.File(label="Upload PDF", file_types=[".pdf"]) format_input = gr.Radio(["Markdown", "JSON"], label="Choose Output Format") output_text = gr.Textbox(label="Converted Document", lines=20) output_metadata = gr.JSON(label="Metadata") convert_button = gr.Button("Convert") convert_button.click( fn=convert_document, inputs=[file_input, format_input], outputs=[output_text, output_metadata] ) app.launch(debug=True)