Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from pathlib import Path | |
| from PIL import Image | |
| import os | |
| def get_available_folders(base_path="outputs"): | |
| """Get list of processed document folders""" | |
| base = Path(base_path) | |
| if not base.exists(): | |
| return [] | |
| folders = [str(f.relative_to(base)) for f in base.iterdir() if f.is_dir()] | |
| return sorted(folders) | |
| def load_folder_content(folder_name, base_path="outputs"): | |
| """Load all content from selected folder""" | |
| if not folder_name: | |
| return "No folder selected", "", "", [], [], "Select a folder to view content" | |
| folder_path = Path(base_path) / folder_name | |
| if not folder_path.exists(): | |
| return "Folder not found", "", "", [], [], "Error: Folder does not exist" | |
| # Load text files | |
| text_file = folder_path / "text_output.txt" | |
| md_file = folder_path / "clean_output.md" | |
| raw_file = folder_path / "raw_output.txt" | |
| text_content = text_file.read_text(encoding='utf-8') if text_file.exists() else "Text file not found" | |
| md_content = md_file.read_text(encoding='utf-8') if md_file.exists() else "Markdown file not found" | |
| raw_content = raw_file.read_text(encoding='utf-8') if raw_file.exists() else "Raw file not found" | |
| # Load bounding box images | |
| boxes_dir = folder_path / "boxes" | |
| box_images = [] | |
| if boxes_dir.exists(): | |
| box_files = sorted(boxes_dir.glob("*.jpg")) + sorted(boxes_dir.glob("*.png")) | |
| box_images = [str(f) for f in box_files] | |
| # Load cropped images | |
| cropped_dir = folder_path / "cropped" | |
| cropped_images = [] | |
| if cropped_dir.exists(): | |
| crop_files = sorted(cropped_dir.glob("*.jpg")) + sorted(cropped_dir.glob("*.png")) | |
| cropped_images = [str(f) for f in crop_files] | |
| # Create summary | |
| summary = f""" | |
| π **Folder**: {folder_name} | |
| π **Text File**: {'β Found' if text_file.exists() else 'β Missing'} | |
| π **Markdown File**: {'β Found' if md_file.exists() else 'β Missing'} | |
| π **Raw File**: {'β Found' if raw_file.exists() else 'β Missing'} | |
| π― **Bounding Boxes**: {len(box_images)} images | |
| βοΈ **Cropped Images**: {len(cropped_images)} images | |
| π **Full Path**: {folder_path.absolute()} | |
| """.strip() | |
| return text_content, md_content, raw_content, box_images, cropped_images, summary | |
| def refresh_folders(base_path="outputs"): | |
| """Refresh the folder list""" | |
| folders = get_available_folders(base_path) | |
| return gr.update(choices=folders, value=folders[0] if folders else None) | |
| def show_view(view_type): | |
| """Toggle visibility of different output views""" | |
| return ( | |
| gr.update(visible=(view_type == "text")), | |
| gr.update(visible=(view_type == "markdown")), | |
| gr.update(visible=(view_type == "raw")), | |
| gr.update(visible=(view_type == "boxes")), | |
| gr.update(visible=(view_type == "crops")) | |
| ) | |
| def search_folders(search_term, base_path="outputs"): | |
| """Search folders by name""" | |
| all_folders = get_available_folders(base_path) | |
| if not search_term: | |
| return gr.update(choices=all_folders) | |
| filtered = [f for f in all_folders if search_term.lower() in f.lower()] | |
| return gr.update(choices=filtered) | |
| # Custom CSS for better styling | |
| custom_css = """ | |
| .folder-info { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| padding: 20px; | |
| border-radius: 10px; | |
| color: white; | |
| margin-bottom: 20px; | |
| } | |
| .summary-box { | |
| background: #f8f9fa; | |
| padding: 15px; | |
| border-radius: 8px; | |
| border-left: 4px solid #667eea; | |
| } | |
| """ | |
| with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="OCR Output Viewer") as demo: | |
| gr.Markdown(""" | |
| # π OCR Output Viewer | |
| View and browse OCR extraction results from saved folders | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π Folder Selection") | |
| base_path_input = gr.Textbox( | |
| label="Base Path", | |
| value="outputs", | |
| placeholder="Enter base folder path" | |
| ) | |
| search_box = gr.Textbox( | |
| label="π Search Folders", | |
| placeholder="Type to filter folders...", | |
| interactive=True | |
| ) | |
| folder_dropdown = gr.Dropdown( | |
| label="Select Folder", | |
| choices=get_available_folders(), | |
| interactive=True | |
| ) | |
| with gr.Row(): | |
| refresh_btn = gr.Button("π Refresh", size="sm", variant="secondary") | |
| load_btn = gr.Button("π Load Folder", size="sm", variant="primary") | |
| gr.Markdown("---") | |
| summary_out = gr.Markdown("", elem_classes="summary-box") | |
| with gr.Column(scale=2): | |
| # View selection buttons in one row | |
| with gr.Row(): | |
| text_btn = gr.Button("π Text", variant="secondary", size="sm") | |
| md_btn = gr.Button("π Markdown", variant="secondary", size="sm") | |
| raw_btn = gr.Button("π Raw", variant="secondary", size="sm") | |
| boxes_btn = gr.Button("π― Boxes", variant="secondary", size="sm") | |
| crops_btn = gr.Button("βοΈ Crops", variant="secondary", size="sm") | |
| # Output containers (only one visible at a time) | |
| text_container = gr.Column(visible=True) | |
| with text_container: | |
| gr.Markdown("### π Text Output") | |
| text_out = gr.Textbox(lines=25, show_copy_button=True, show_label=False) | |
| md_container = gr.Column(visible=False) | |
| with md_container: | |
| gr.Markdown("### π Markdown Output") | |
| md_out = gr.Markdown("") | |
| raw_container = gr.Column(visible=False) | |
| with raw_container: | |
| gr.Markdown("### π Raw Output") | |
| raw_out = gr.Textbox(lines=25, show_copy_button=True, show_label=False) | |
| boxes_container = gr.Column(visible=False) | |
| with boxes_container: | |
| gr.Markdown("### π― Bounding Boxes") | |
| boxes_gallery = gr.Gallery(show_label=False, columns=3, height=600) | |
| crops_container = gr.Column(visible=False) | |
| with crops_container: | |
| gr.Markdown("### βοΈ Cropped Images") | |
| crops_gallery = gr.Gallery(show_label=False, columns=4, height=600) | |
| with gr.Accordion("βΉοΈ How to Use", open=False): | |
| gr.Markdown(""" | |
| ### Instructions: | |
| 1. **Set Base Path**: Enter the folder where OCR outputs are saved (default: `outputs`) | |
| 2. **Search**: Use search box to filter folders by name | |
| 3. **Select Folder**: Choose a processed document folder from dropdown | |
| 4. **Load**: Click "Load Folder" to view the content | |
| 5. **Switch Views**: Use the buttons (Text, Markdown, Raw, Boxes, Crops) to view different outputs | |
| 6. **Refresh**: Click refresh button to update folder list | |
| ### Folder Structure Expected: | |
| ``` | |
| outputs/ | |
| βββ 01_document_name/ | |
| β βββ text_output.txt | |
| β βββ clean_output.md | |
| β βββ raw_output.txt | |
| β βββ boxes/ | |
| β β βββ page_01_box.jpg | |
| β βββ cropped/ | |
| β βββ crop_01.jpg | |
| ``` | |
| """) | |
| # Event handlers | |
| refresh_btn.click( | |
| refresh_folders, | |
| [base_path_input], | |
| [folder_dropdown] | |
| ) | |
| search_box.change( | |
| search_folders, | |
| [search_box, base_path_input], | |
| [folder_dropdown] | |
| ) | |
| load_btn.click( | |
| load_folder_content, | |
| [folder_dropdown, base_path_input], | |
| [text_out, md_out, raw_out, boxes_gallery, crops_gallery, summary_out] | |
| ) | |
| # Auto-load when folder is selected | |
| folder_dropdown.change( | |
| load_folder_content, | |
| [folder_dropdown, base_path_input], | |
| [text_out, md_out, raw_out, boxes_gallery, crops_gallery, summary_out] | |
| ) | |
| # View toggle buttons | |
| text_btn.click( | |
| lambda: show_view("text"), | |
| None, | |
| [text_container, md_container, raw_container, boxes_container, crops_container] | |
| ) | |
| md_btn.click( | |
| lambda: show_view("markdown"), | |
| None, | |
| [text_container, md_container, raw_container, boxes_container, crops_container] | |
| ) | |
| raw_btn.click( | |
| lambda: show_view("raw"), | |
| None, | |
| [text_container, md_container, raw_container, boxes_container, crops_container] | |
| ) | |
| boxes_btn.click( | |
| lambda: show_view("boxes"), | |
| None, | |
| [text_container, md_container, raw_container, boxes_container, crops_container] | |
| ) | |
| crops_btn.click( | |
| lambda: show_view("crops"), | |
| None, | |
| [text_container, md_container, raw_container, boxes_container, crops_container] | |
| ) | |
| # Load folders on startup | |
| demo.load( | |
| refresh_folders, | |
| [base_path_input], | |
| [folder_dropdown] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7861, share=False) |