Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from docling.datamodel.base_models import InputFormat | |
| from docling.datamodel.pipeline_options import PdfPipelineOptions | |
| from docling.document_converter import DocumentConverter, PdfFormatOption | |
| from docling_core.types.doc import ImageRefMode | |
| from pathlib import Path | |
| import os | |
| # Define the document converter | |
| pipeline_options = PdfPipelineOptions() | |
| pipeline_options.do_ocr = True | |
| pipeline_options.do_table_structure = True | |
| pipeline_options.table_structure_options.do_cell_matching = True | |
| pipeline_options.generate_picture_images = True | |
| doc_converter = DocumentConverter( | |
| format_options={ | |
| InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options) | |
| } | |
| ) | |
| def convert_to_markdown(file): | |
| # Use the file-like object directly | |
| input_path = Path(file.name) | |
| # Convert the document | |
| result = doc_converter.convert(str(input_path)) | |
| # Prepare output directory | |
| output_dir = Path("output") | |
| output_dir.mkdir(exist_ok=True) | |
| # Save result as markdown | |
| doc_filename = result.input.file.stem | |
| md_filename = output_dir / f"{doc_filename}-with-images.md" | |
| result.document.save_as_markdown(md_filename, image_mode=ImageRefMode.REFERENCED) | |
| # Load the markdown content | |
| with open(md_filename, 'r', encoding='utf-8') as f: | |
| markdown_content = f.read() | |
| return markdown_content | |
| # Create Gradio interface with type="file" | |
| iface = gr.Interface( | |
| fn=convert_to_markdown, | |
| inputs=gr.File(label="Upload your document", type="file"), | |
| outputs="markdown", | |
| title="Document to Markdown Converter", | |
| description="Upload a document (e.g., PDF, DOCX, PPTX) and get its Markdown version." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() |