Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| from docling.document_converter import DocumentConverter | |
| import spaces | |
| def convert_document(file, output_format): | |
| # Load document and convert it using Docling | |
| converter = DocumentConverter() | |
| result = converter.convert(file.name) | |
| # Check available attributes in DoclingDocument | |
| available_attributes = dir(result.document) | |
| # Choose the output format | |
| if output_format == "Markdown": | |
| converted_text = result.document.export_to_markdown() | |
| elif output_format == "JSON": | |
| converted_text = result.document.export_to_json() | |
| else: | |
| converted_text = "Unsupported format" | |
| # Placeholder metadata extraction based on available attributes | |
| metadata = { | |
| "Available Attributes": available_attributes | |
| } | |
| return converted_text, metadata | |
| # Define the Gradio interface using the new component syntax | |
| with gr.Blocks() as app: | |
| gr.Markdown("# Document Converter with Docling") | |
| gr.Markdown("Upload a document, choose the output format, and get the converted text with metadata.") | |
| file_input = gr.File(label="Upload Document") | |
| format_input = gr.Radio(["Markdown", "JSON"], label="Choose Output Format") | |
| output_text = gr.Textbox(label="Converted Document") | |
| output_metadata = gr.JSON(label="Metadata") | |
| # Define the process button and bind it to the function | |
| convert_button = gr.Button("Convert") | |
| convert_button.click( | |
| convert_document, | |
| inputs=[file_input, format_input], | |
| outputs=[output_text, output_metadata] | |
| ) | |
| app.launch(debug=True) |