deepseek_ocr / view_app.py
akshayve3's picture
Update view_app.py
da3c30d verified
import gradio as gr
from pathlib import Path
from PIL import Image
import os
def get_available_folders(base_path="outputs"):
"""Get list of processed document folders"""
base = Path(base_path)
if not base.exists():
return []
folders = [str(f.relative_to(base)) for f in base.iterdir() if f.is_dir()]
return sorted(folders)
def load_folder_content(folder_name, base_path="outputs"):
"""Load all content from selected folder"""
if not folder_name:
return "No folder selected", "", "", [], [], "Select a folder to view content"
folder_path = Path(base_path) / folder_name
if not folder_path.exists():
return "Folder not found", "", "", [], [], "Error: Folder does not exist"
# Load text files
text_file = folder_path / "text_output.txt"
md_file = folder_path / "clean_output.md"
raw_file = folder_path / "raw_output.txt"
text_content = text_file.read_text(encoding='utf-8') if text_file.exists() else "Text file not found"
md_content = md_file.read_text(encoding='utf-8') if md_file.exists() else "Markdown file not found"
raw_content = raw_file.read_text(encoding='utf-8') if raw_file.exists() else "Raw file not found"
# Load bounding box images
boxes_dir = folder_path / "boxes"
box_images = []
if boxes_dir.exists():
box_files = sorted(boxes_dir.glob("*.jpg")) + sorted(boxes_dir.glob("*.png"))
box_images = [str(f) for f in box_files]
# Load cropped images
cropped_dir = folder_path / "cropped"
cropped_images = []
if cropped_dir.exists():
crop_files = sorted(cropped_dir.glob("*.jpg")) + sorted(cropped_dir.glob("*.png"))
cropped_images = [str(f) for f in crop_files]
# Create summary
summary = f"""
πŸ“ **Folder**: {folder_name}
πŸ“„ **Text File**: {'βœ… Found' if text_file.exists() else '❌ Missing'}
πŸ“‹ **Markdown File**: {'βœ… Found' if md_file.exists() else '❌ Missing'}
πŸ” **Raw File**: {'βœ… Found' if raw_file.exists() else '❌ Missing'}
🎯 **Bounding Boxes**: {len(box_images)} images
βœ‚οΈ **Cropped Images**: {len(cropped_images)} images
πŸ“‚ **Full Path**: {folder_path.absolute()}
""".strip()
return text_content, md_content, raw_content, box_images, cropped_images, summary
def refresh_folders(base_path="outputs"):
"""Refresh the folder list"""
folders = get_available_folders(base_path)
return gr.update(choices=folders, value=folders[0] if folders else None)
def show_view(view_type):
"""Toggle visibility of different output views"""
return (
gr.update(visible=(view_type == "text")),
gr.update(visible=(view_type == "markdown")),
gr.update(visible=(view_type == "raw")),
gr.update(visible=(view_type == "boxes")),
gr.update(visible=(view_type == "crops"))
)
def search_folders(search_term, base_path="outputs"):
"""Search folders by name"""
all_folders = get_available_folders(base_path)
if not search_term:
return gr.update(choices=all_folders)
filtered = [f for f in all_folders if search_term.lower() in f.lower()]
return gr.update(choices=filtered)
# Custom CSS for better styling
custom_css = """
.folder-info {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
padding: 20px;
border-radius: 10px;
color: white;
margin-bottom: 20px;
}
.summary-box {
background: #f8f9fa;
padding: 15px;
border-radius: 8px;
border-left: 4px solid #667eea;
}
"""
with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="OCR Output Viewer") as demo:
gr.Markdown("""
# πŸ“‚ OCR Output Viewer
View and browse OCR extraction results from saved folders
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### πŸ“ Folder Selection")
base_path_input = gr.Textbox(
label="Base Path",
value="outputs",
placeholder="Enter base folder path"
)
search_box = gr.Textbox(
label="πŸ” Search Folders",
placeholder="Type to filter folders...",
interactive=True
)
folder_dropdown = gr.Dropdown(
label="Select Folder",
choices=get_available_folders(),
interactive=True
)
with gr.Row():
refresh_btn = gr.Button("πŸ”„ Refresh", size="sm", variant="secondary")
load_btn = gr.Button("πŸ“‚ Load Folder", size="sm", variant="primary")
gr.Markdown("---")
summary_out = gr.Markdown("", elem_classes="summary-box")
with gr.Column(scale=2):
# View selection buttons in one row
with gr.Row():
text_btn = gr.Button("πŸ“„ Text", variant="secondary", size="sm")
md_btn = gr.Button("πŸ“‹ Markdown", variant="secondary", size="sm")
raw_btn = gr.Button("πŸ” Raw", variant="secondary", size="sm")
boxes_btn = gr.Button("🎯 Boxes", variant="secondary", size="sm")
crops_btn = gr.Button("βœ‚οΈ Crops", variant="secondary", size="sm")
# Output containers (only one visible at a time)
text_container = gr.Column(visible=True)
with text_container:
gr.Markdown("### πŸ“„ Text Output")
text_out = gr.Textbox(lines=25, show_copy_button=True, show_label=False)
md_container = gr.Column(visible=False)
with md_container:
gr.Markdown("### πŸ“‹ Markdown Output")
md_out = gr.Markdown("")
raw_container = gr.Column(visible=False)
with raw_container:
gr.Markdown("### πŸ” Raw Output")
raw_out = gr.Textbox(lines=25, show_copy_button=True, show_label=False)
boxes_container = gr.Column(visible=False)
with boxes_container:
gr.Markdown("### 🎯 Bounding Boxes")
boxes_gallery = gr.Gallery(show_label=False, columns=3, height=600)
crops_container = gr.Column(visible=False)
with crops_container:
gr.Markdown("### βœ‚οΈ Cropped Images")
crops_gallery = gr.Gallery(show_label=False, columns=4, height=600)
with gr.Accordion("ℹ️ How to Use", open=False):
gr.Markdown("""
### Instructions:
1. **Set Base Path**: Enter the folder where OCR outputs are saved (default: `outputs`)
2. **Search**: Use search box to filter folders by name
3. **Select Folder**: Choose a processed document folder from dropdown
4. **Load**: Click "Load Folder" to view the content
5. **Switch Views**: Use the buttons (Text, Markdown, Raw, Boxes, Crops) to view different outputs
6. **Refresh**: Click refresh button to update folder list
### Folder Structure Expected:
```
outputs/
β”œβ”€β”€ 01_document_name/
β”‚ β”œβ”€β”€ text_output.txt
β”‚ β”œβ”€β”€ clean_output.md
β”‚ β”œβ”€β”€ raw_output.txt
β”‚ β”œβ”€β”€ boxes/
β”‚ β”‚ └── page_01_box.jpg
β”‚ └── cropped/
β”‚ └── crop_01.jpg
```
""")
# Event handlers
refresh_btn.click(
refresh_folders,
[base_path_input],
[folder_dropdown]
)
search_box.change(
search_folders,
[search_box, base_path_input],
[folder_dropdown]
)
load_btn.click(
load_folder_content,
[folder_dropdown, base_path_input],
[text_out, md_out, raw_out, boxes_gallery, crops_gallery, summary_out]
)
# Auto-load when folder is selected
folder_dropdown.change(
load_folder_content,
[folder_dropdown, base_path_input],
[text_out, md_out, raw_out, boxes_gallery, crops_gallery, summary_out]
)
# View toggle buttons
text_btn.click(
lambda: show_view("text"),
None,
[text_container, md_container, raw_container, boxes_container, crops_container]
)
md_btn.click(
lambda: show_view("markdown"),
None,
[text_container, md_container, raw_container, boxes_container, crops_container]
)
raw_btn.click(
lambda: show_view("raw"),
None,
[text_container, md_container, raw_container, boxes_container, crops_container]
)
boxes_btn.click(
lambda: show_view("boxes"),
None,
[text_container, md_container, raw_container, boxes_container, crops_container]
)
crops_btn.click(
lambda: show_view("crops"),
None,
[text_container, md_container, raw_container, boxes_container, crops_container]
)
# Load folders on startup
demo.load(
refresh_folders,
[base_path_input],
[folder_dropdown]
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7861, share=False)