import gradio as gr from pathlib import Path from PIL import Image import os def get_available_folders(base_path="outputs"): """Get list of processed document folders""" base = Path(base_path) if not base.exists(): return [] folders = [str(f.relative_to(base)) for f in base.iterdir() if f.is_dir()] return sorted(folders) def load_folder_content(folder_name, base_path="outputs"): """Load all content from selected folder""" if not folder_name: return "No folder selected", "", "", [], [], "Select a folder to view content" folder_path = Path(base_path) / folder_name if not folder_path.exists(): return "Folder not found", "", "", [], [], "Error: Folder does not exist" # Load text files text_file = folder_path / "text_output.txt" md_file = folder_path / "clean_output.md" raw_file = folder_path / "raw_output.txt" text_content = text_file.read_text(encoding='utf-8') if text_file.exists() else "Text file not found" md_content = md_file.read_text(encoding='utf-8') if md_file.exists() else "Markdown file not found" raw_content = raw_file.read_text(encoding='utf-8') if raw_file.exists() else "Raw file not found" # Load bounding box images boxes_dir = folder_path / "boxes" box_images = [] if boxes_dir.exists(): box_files = sorted(boxes_dir.glob("*.jpg")) + sorted(boxes_dir.glob("*.png")) box_images = [str(f) for f in box_files] # Load cropped images cropped_dir = folder_path / "cropped" cropped_images = [] if cropped_dir.exists(): crop_files = sorted(cropped_dir.glob("*.jpg")) + sorted(cropped_dir.glob("*.png")) cropped_images = [str(f) for f in crop_files] # Create summary summary = f""" 📁 **Folder**: {folder_name} 📄 **Text File**: {'✅ Found' if text_file.exists() else '❌ Missing'} 📋 **Markdown File**: {'✅ Found' if md_file.exists() else '❌ Missing'} 🔍 **Raw File**: {'✅ Found' if raw_file.exists() else '❌ Missing'} đŸŽ¯ **Bounding Boxes**: {len(box_images)} images âœ‚ī¸ **Cropped Images**: {len(cropped_images)} images 📂 **Full Path**: {folder_path.absolute()} """.strip() return text_content, md_content, raw_content, box_images, cropped_images, summary def refresh_folders(base_path="outputs"): """Refresh the folder list""" folders = get_available_folders(base_path) return gr.update(choices=folders, value=folders[0] if folders else None) def show_view(view_type): """Toggle visibility of different output views""" return ( gr.update(visible=(view_type == "text")), gr.update(visible=(view_type == "markdown")), gr.update(visible=(view_type == "raw")), gr.update(visible=(view_type == "boxes")), gr.update(visible=(view_type == "crops")) ) def search_folders(search_term, base_path="outputs"): """Search folders by name""" all_folders = get_available_folders(base_path) if not search_term: return gr.update(choices=all_folders) filtered = [f for f in all_folders if search_term.lower() in f.lower()] return gr.update(choices=filtered) # Custom CSS for better styling custom_css = """ .folder-info { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 20px; border-radius: 10px; color: white; margin-bottom: 20px; } .summary-box { background: #f8f9fa; padding: 15px; border-radius: 8px; border-left: 4px solid #667eea; } """ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="OCR Output Viewer") as demo: gr.Markdown(""" # 📂 OCR Output Viewer View and browse OCR extraction results from saved folders """) with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 📁 Folder Selection") base_path_input = gr.Textbox( label="Base Path", value="outputs", placeholder="Enter base folder path" ) search_box = gr.Textbox( label="🔍 Search Folders", placeholder="Type to filter folders...", interactive=True ) folder_dropdown = gr.Dropdown( label="Select Folder", choices=get_available_folders(), interactive=True ) with gr.Row(): refresh_btn = gr.Button("🔄 Refresh", size="sm", variant="secondary") load_btn = gr.Button("📂 Load Folder", size="sm", variant="primary") gr.Markdown("---") summary_out = gr.Markdown("", elem_classes="summary-box") with gr.Column(scale=2): # View selection buttons in one row with gr.Row(): text_btn = gr.Button("📄 Text", variant="secondary", size="sm") md_btn = gr.Button("📋 Markdown", variant="secondary", size="sm") raw_btn = gr.Button("🔍 Raw", variant="secondary", size="sm") boxes_btn = gr.Button("đŸŽ¯ Boxes", variant="secondary", size="sm") crops_btn = gr.Button("âœ‚ī¸ Crops", variant="secondary", size="sm") # Output containers (only one visible at a time) text_container = gr.Column(visible=True) with text_container: gr.Markdown("### 📄 Text Output") text_out = gr.Textbox(lines=25, show_copy_button=True, show_label=False) md_container = gr.Column(visible=False) with md_container: gr.Markdown("### 📋 Markdown Output") md_out = gr.Markdown("") raw_container = gr.Column(visible=False) with raw_container: gr.Markdown("### 🔍 Raw Output") raw_out = gr.Textbox(lines=25, show_copy_button=True, show_label=False) boxes_container = gr.Column(visible=False) with boxes_container: gr.Markdown("### đŸŽ¯ Bounding Boxes") boxes_gallery = gr.Gallery(show_label=False, columns=3, height=600) crops_container = gr.Column(visible=False) with crops_container: gr.Markdown("### âœ‚ī¸ Cropped Images") crops_gallery = gr.Gallery(show_label=False, columns=4, height=600) with gr.Accordion("â„šī¸ How to Use", open=False): gr.Markdown(""" ### Instructions: 1. **Set Base Path**: Enter the folder where OCR outputs are saved (default: `outputs`) 2. **Search**: Use search box to filter folders by name 3. **Select Folder**: Choose a processed document folder from dropdown 4. **Load**: Click "Load Folder" to view the content 5. **Switch Views**: Use the buttons (Text, Markdown, Raw, Boxes, Crops) to view different outputs 6. **Refresh**: Click refresh button to update folder list ### Folder Structure Expected: ``` outputs/ ├── 01_document_name/ │ ├── text_output.txt │ ├── clean_output.md │ ├── raw_output.txt │ ├── boxes/ │ │ └── page_01_box.jpg │ └── cropped/ │ └── crop_01.jpg ``` """) # Event handlers refresh_btn.click( refresh_folders, [base_path_input], [folder_dropdown] ) search_box.change( search_folders, [search_box, base_path_input], [folder_dropdown] ) load_btn.click( load_folder_content, [folder_dropdown, base_path_input], [text_out, md_out, raw_out, boxes_gallery, crops_gallery, summary_out] ) # Auto-load when folder is selected folder_dropdown.change( load_folder_content, [folder_dropdown, base_path_input], [text_out, md_out, raw_out, boxes_gallery, crops_gallery, summary_out] ) # View toggle buttons text_btn.click( lambda: show_view("text"), None, [text_container, md_container, raw_container, boxes_container, crops_container] ) md_btn.click( lambda: show_view("markdown"), None, [text_container, md_container, raw_container, boxes_container, crops_container] ) raw_btn.click( lambda: show_view("raw"), None, [text_container, md_container, raw_container, boxes_container, crops_container] ) boxes_btn.click( lambda: show_view("boxes"), None, [text_container, md_container, raw_container, boxes_container, crops_container] ) crops_btn.click( lambda: show_view("crops"), None, [text_container, md_container, raw_container, boxes_container, crops_container] ) # Load folders on startup demo.load( refresh_folders, [base_path_input], [folder_dropdown] ) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7861, share=False)