Spaces:
Running
Running
| import streamlit as st | |
| import os | |
| import glob | |
| from pathlib import Path | |
| # Set page configuration | |
| st.set_page_config( | |
| page_title="OCR analysis results", | |
| layout="wide", | |
| initial_sidebar_state="collapsed" | |
| ) | |
| # Custom CSS for a cleaner interface | |
| st.markdown(""" | |
| <style> | |
| .main { padding-top: 1rem; } | |
| .stTabs [data-baseweb="tab-list"] { | |
| gap: 1rem; | |
| margin-bottom: 1rem; | |
| } | |
| .stTabs [data-baseweb="tab"] { | |
| height: 50px; | |
| white-space: pre-wrap; | |
| border-radius: 4px 4px 0 0; | |
| font-weight: bold; | |
| letter-spacing: 1px; | |
| } | |
| .image-container { | |
| display: flex; | |
| justify-content: center; | |
| } | |
| .markdown-container { | |
| margin-top: 2rem; | |
| border: 1px solid #f0f0f0; | |
| padding: 1rem; | |
| border-radius: 5px; | |
| background-color: #f9f9f9; | |
| } | |
| .toggle-container { | |
| margin-bottom: 1rem; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| def get_all_docs(repo_path="markdowns"): | |
| """ | |
| Gets all document IDs from the nested structure in markdowns directory. | |
| Structure: markdowns/folder_id/doc_id/ | |
| """ | |
| all_docs = [] | |
| if not os.path.exists(repo_path): | |
| return [] | |
| # Get all folder_ids (we'll use the paths but skip showing them in UI) | |
| folder_paths = [f for f in glob.glob(os.path.join(repo_path, '*')) if os.path.isdir(f)] | |
| # For each folder, get all doc_ids | |
| for folder_path in folder_paths: | |
| doc_paths = [f for f in glob.glob(os.path.join(folder_path, '*')) if os.path.isdir(f)] | |
| for doc_path in doc_paths: | |
| doc_id = os.path.basename(doc_path) | |
| all_docs.append((doc_id, doc_path)) | |
| return all_docs | |
| def read_markdown_file(file_path): | |
| """Reads the content of a markdown file.""" | |
| if os.path.exists(file_path): | |
| try: | |
| with open(file_path, 'r', encoding='utf-8') as f: | |
| return f.read() | |
| except Exception as e: | |
| return f"Error reading markdown file: {str(e)}" | |
| return "Markdown file not found." | |
| def display_model_outputs(doc_path): | |
| """Displays the markdown outputs from different models for the current document.""" | |
| # Check which markdown files are available | |
| md_files = glob.glob(os.path.join(doc_path, "*.md")) | |
| if not md_files: | |
| st.warning("No markdown files found for this document") | |
| return | |
| # Extract model names and sort them | |
| model_names = [os.path.basename(md_file).replace(".md", "") for md_file in md_files] | |
| model_names.sort() # Ensure consistent order | |
| # Convert model names to uppercase | |
| display_names = [name.upper() for name in model_names] | |
| # Initialize show_parsed in session_state if not already set | |
| if 'show_parsed' not in st.session_state: | |
| st.session_state.show_parsed = False | |
| # Toggle for raw/parsed markdown that preserves state | |
| st.markdown("<div class='toggle-container'>", unsafe_allow_html=True) | |
| show_parsed = st.checkbox( | |
| "Show Parsed Markdown", | |
| value=st.session_state.show_parsed, | |
| key="parsed_markdown_toggle", | |
| on_change=lambda: setattr(st.session_state, 'show_parsed', st.session_state.parsed_markdown_toggle) | |
| ) | |
| st.markdown("</div>", unsafe_allow_html=True) | |
| # Create tabs for each model | |
| tabs = st.tabs(display_names) | |
| for i, model_name in enumerate(model_names): | |
| md_path = os.path.join(doc_path, f"{model_name}.md") | |
| md_content = read_markdown_file(md_path) | |
| with tabs[i]: | |
| if show_parsed: | |
| st.markdown(md_content, unsafe_allow_html=True) | |
| else: | |
| st.markdown("<div class='markdown-container'>", unsafe_allow_html=True) | |
| st.code(md_content, language="markdown") | |
| st.markdown("</div>", unsafe_allow_html=True) | |
| def main(): | |
| """Main function to run the Streamlit app.""" | |
| st.title("Document Analysis Leaderboard") | |
| # Get all doc-ids from the fixed repository path | |
| repo_path = "markdowns" | |
| with st.spinner("Loading documents..."): | |
| all_docs = get_all_docs(repo_path) | |
| if not all_docs: | |
| st.error(f"No documents found in {repo_path}. Please check the directory structure.") | |
| if os.path.exists(repo_path): | |
| st.info(f"The path {repo_path} exists, but no documents were found.") | |
| else: | |
| st.info(f"The path {repo_path} does not exist.") | |
| return | |
| # Sort docs by doc_id for consistent ordering | |
| all_docs.sort() | |
| # Initialize session state for current index | |
| if 'current_index' not in st.session_state: | |
| st.session_state.current_index = 0 | |
| # Ensure current_index is within bounds | |
| st.session_state.current_index = min(st.session_state.current_index, len(all_docs) - 1) | |
| # Current document info | |
| doc_id, doc_path = all_docs[st.session_state.current_index] | |
| # Navigation buttons | |
| col1, col2, col3 = st.columns([1, 4, 1]) | |
| with col1: | |
| if st.button("← Previous", use_container_width=True): | |
| st.session_state.current_index = (st.session_state.current_index - 1) % len(all_docs) | |
| st.rerun() | |
| with col2: | |
| st.markdown(f"### Document: {doc_id}") | |
| st.caption(f"Document {st.session_state.current_index + 1} of {len(all_docs)}") | |
| with col3: | |
| if st.button("Next →", use_container_width=True): | |
| st.session_state.current_index = (st.session_state.current_index + 1) % len(all_docs) | |
| st.rerun() | |
| # Display document image with reduced size and centered | |
| image_path = os.path.join(doc_path, "image.jpg") | |
| try: | |
| if os.path.exists(image_path): | |
| # Use columns to center and size the image | |
| col1, col2, col3 = st.columns([1, 2, 1]) | |
| with col2: | |
| st.markdown("<div class='image-container'>", unsafe_allow_html=True) | |
| st.image(image_path, width=500) # Fixed width for smaller size | |
| st.markdown("</div>", unsafe_allow_html=True) | |
| else: | |
| st.info("Image not available for this document") | |
| except Exception as e: | |
| st.error(f"Error loading image: {str(e)}") | |
| # Add separator between image and model outputs | |
| st.markdown("<hr style='margin: 2rem 0;'>", unsafe_allow_html=True) | |
| # Display model outputs | |
| display_model_outputs(doc_path) | |
| if __name__ == "__main__": | |
| main() |