Spaces:
Running
Running
| import gradio as gr | |
| from docling.document_converter import DocumentConverter, PdfFormatOption | |
| from docling.datamodel.pipeline_options import ( | |
| AcceleratorDevice, | |
| PdfPipelineOptions, | |
| AcceleratorOptions | |
| ) | |
| import spaces | |
| from docling.datamodel.base_models import InputFormat | |
| from marker.converters.pdf import PdfConverter | |
| from marker.models import create_model_dict | |
| from marker.output import text_from_rendered | |
| # Docling | |
| accelerator_options = AcceleratorOptions( | |
| num_threads=8, device=AcceleratorDevice.CPU | |
| ) | |
| pipeline_options = PdfPipelineOptions() | |
| pipeline_options.accelerator_options = accelerator_options | |
| pipeline_options.do_ocr = True | |
| pipeline_options.do_table_structure = True | |
| pipeline_options.table_structure_options.do_cell_matching = True | |
| docling_converter = DocumentConverter( | |
| format_options={ | |
| InputFormat.PDF: PdfFormatOption( | |
| pipeline_options=pipeline_options, | |
| ) | |
| } | |
| ) | |
| # Marker | |
| marker_converter = PdfConverter( | |
| artifact_dict=create_model_dict(), | |
| ) | |
| def convert_document(file, method): | |
| if method == "Docling": | |
| result = docling_converter.convert(file.name) | |
| return result.document.export_to_markdown() | |
| elif method == "Marker": | |
| rendered = marker_converter(file.name) | |
| text, _, images = text_from_rendered(rendered) | |
| return text | |
| else: | |
| return 'unknown method' | |
| with gr.Blocks() as app: | |
| gr.Markdown("# Document Converter") | |
| gr.Markdown("Upload a document, choose the backend, and get the converted text with metadata.") | |
| file_input = gr.File(label="Upload Document") | |
| method_input = gr.Radio(["Docling", "Marker"], label="Choose Conversion Backend") | |
| output_text = gr.Textbox(label="Converted Document") | |
| convert_button = gr.Button("Convert") | |
| convert_button.click( | |
| convert_document, | |
| inputs=[file_input, method_input], | |
| outputs=[output_text] | |
| ) | |
| app.launch(debug=True, show_error=True) |