Spaces:

milwright
/

historical-ocr

Running

File size: 12,702 Bytes

85bdb4e

import streamlit as st
from pathlib import Path
import sys
from layout import page_wrapper
from modules import get_module, get_module_name, module_names

# Set page configuration with dark theme
st.set_page_config(
    page_title="Historical OCR Workshop",
    page_icon="📜",
    layout="wide",
    initial_sidebar_state="collapsed"
)

# Initialize session state for workshop navigation
if 'current_module' not in st.session_state:
    st.session_state.current_module = 1

if 'workshop_started' not in st.session_state:
    st.session_state.workshop_started = False

if 'processing_history' not in st.session_state:
    st.session_state.processing_history = []

def navigate_to_module(module_number):
    """Navigate to a specific module"""
    st.session_state.current_module = module_number
    st.rerun()

# Welcome screen if workshop hasn't been started
if not st.session_state.workshop_started:
    def welcome_screen():
        """Renders the welcome/start screen"""
        # Hero section with eye-catching design
        st.markdown("""
        <div style="background: linear-gradient(135deg, #1E3A8A 0%, #2563EB 100%); 
                    padding: 2rem; border-radius: 0.75rem; text-align: center; 
                    margin-bottom: 2rem; box-shadow: 0 4px 6px rgba(0,0,0,0.3);">
            <h1>Historical OCR Workshop</h1>
            <p style="font-size: 1.25rem;">Unlock the potential of historical documents with modern OCR technology</p>
        </div>
        """, unsafe_allow_html=True)
        
        # Introduction with cleaner layout
        col1, col2 = st.columns([3, 2])
        
        with col1:
            st.markdown("""
            <div style="background-color: #1f2937; padding: 1.5rem; border-radius: 0.75rem; margin-bottom: 1.5rem;">
            <h3>Workshop Overview</h3>
            
            This interactive workshop explores the application of OCR technology to historical documents,
            combining theoretical understanding with practical experiences. Designed for historians, 
            archivists, and digital humanities scholars, it offers both conceptual frameworks and hands-on skills.
            </div>
            """, unsafe_allow_html=True)
            
            st.markdown("""
            <div style="background-color: #374151; padding: 0.75rem; border-radius: 0.5rem; 
                        margin: 1rem 0; border-left: 3px solid #3B82F6;">
            <h4>What is OCR?</h4>
            Optical Character Recognition (OCR) technology enables computers to extract text from images and documents.
            Modern OCR uses AI vision models to understand both the text and its visual context, making it powerful for
            historical research and digital humanities.
            </div>
            """, unsafe_allow_html=True)
            
        with col2:
            # Add an engaging research question
            st.markdown("""
            <div style="background-color: #1E3A8A; color: white; padding: 0.75rem; 
                       border-radius: 0.5rem; margin: 1rem 0; border-left: 3px solid #60A5FA;">
            <h4>For Historians:</h4>
            How might OCR technology transform our access to and interpretation of historical documents? 
            What new research questions become possible when large archives become machine-readable?
            </div>
            """, unsafe_allow_html=True)
            
            # Display a sample historical document image
            input_dir = Path(__file__).parent / "input"
            sample_path = input_dir / "magellan-travels.jpg"
            if sample_path.exists():
                try:
                    from PIL import Image
                    with Image.open(sample_path) as img:
                        st.image(img, caption="Sample Historical Document", width=300)
                except Exception:
                    pass
        
        # What you'll learn section with visual learning outcomes
        st.markdown('<h3>What You\'ll Learn</h3>', unsafe_allow_html=True)
        
        # Create three columns for clean layout
        col1, col2, col3 = st.columns(3)
        
        with col1:
            st.markdown("""
            <div style="background-color: #1f2937; padding: 1rem; border-radius: 0.5rem;">
            <h4>Conceptual Understanding</h4>
            
            - Text-image relationships in historical documents
            - Evolution of OCR technology 
            - AI vision models for document analysis
            - Historical typography challenges
            </div>
            """, unsafe_allow_html=True)
            
        with col2:
            st.markdown("""
            <div style="background-color: #1f2937; padding: 1rem; border-radius: 0.5rem;">
            <h4>Methodological Approaches</h4>
            
            - Critical frameworks for OCR in historical research
            - Hybrid computational-traditional methods
            - Error analysis and interpretation
            - Contextual reading strategies
            </div>
            """, unsafe_allow_html=True)
            
        with col3:
            st.markdown("""
            <div style="background-color: #1f2937; padding: 1rem; border-radius: 0.5rem;">
            <h4>Practical Skills</h4>
            
            - Processing historical documents with OCR
            - Analyzing and structuring extracted information
            - Integrating OCR into research workflows
            - Building searchable archives
            </div>
            """, unsafe_allow_html=True)
        
        # Module overview
        st.markdown('<h3>Workshop Modules</h3>', unsafe_allow_html=True)
        
        # First row of modules
        col1, col2 = st.columns(2)
        
        with col1:
            for i in [1, 3, 5]:
                st.markdown(f"""
                <div style="background-color: #1f2937; border-radius: 8px; padding: 16px; 
                           margin-bottom: 16px; border-top: 4px solid #3B82F6;">
                    <div style="background-color: #3B82F6; color: white; font-weight: bold; 
                               padding: 4px 10px; border-radius: 12px; font-size: 0.9rem; 
                               display: inline-block; margin-bottom: 8px;">Module {i}</div>
                    <div style="font-weight: 600; margin-bottom: 8px; font-size: 1.1rem; color: white;">
                        {module_names[i-1]}
                    </div>
                    <p>Module {i} of the historical OCR workshop.</p>
                </div>
                """, unsafe_allow_html=True)
        
        with col2:
            for i in [2, 4, 6]:
                st.markdown(f"""
                <div style="background-color: #1f2937; border-radius: 8px; padding: 16px; 
                           margin-bottom: 16px; border-top: 4px solid #3B82F6;">
                    <div style="background-color: #3B82F6; color: white; font-weight: bold; 
                               padding: 4px 10px; border-radius: 12px; font-size: 0.9rem; 
                               display: inline-block; margin-bottom: 8px;">Module {i}</div>
                    <div style="font-weight: 600; margin-bottom: 8px; font-size: 1.1rem; color: white;">
                        {module_names[i-1]}
                    </div>
                    <p>Module {i} of the historical OCR workshop.</p>
                </div>
                """, unsafe_allow_html=True)
        
        # Inspirational quote
        st.markdown("""
        <div style="font-style: italic; color: #D1D5DB; padding: 0.5rem 1rem; 
                   border-left: 3px solid #4B5563; margin: 1rem 0;">
        "The digital turn in historical research is not just about converting analog to digital; 
        it's about transforming how we access, analyze, and interpret the past."
        <br/><br/>
        <span style="font-size:0.9rem; text-align:right; display:block;">— Dr. Jane Winters, Professor of Digital Humanities</span>
        </div>
        """, unsafe_allow_html=True)
        
        # Start button with enhanced styling
        st.markdown('<div style="text-align: center; margin-top: 2rem;">', unsafe_allow_html=True)
        col1, col2, col3 = st.columns([1, 2, 1])
        with col2:
            if st.button("Begin Workshop Journey", key="start_workshop", type="primary", use_container_width=True):
                st.session_state.workshop_started = True
                st.rerun()
        st.markdown('<p style="text-align:center; margin-top:8px; font-size:0.9rem; color:#666;">No installation required • Start immediately</p>', unsafe_allow_html=True)
        st.markdown('</div>', unsafe_allow_html=True)
    
    # Display the welcome screen (outside modules)
    welcome_screen()
else:
    # Get the current module to display
    current_module = st.session_state.current_module
    module = get_module(current_module)
    
    # Create navigation callbacks for the page wrapper
    def nav_to_prev():
        if current_module > 1:
            st.session_state.current_module = current_module - 1
            st.rerun()
    
    def nav_to_next():
        if current_module < 6:
            st.session_state.current_module = current_module + 1
            st.rerun()
    
    # Create the sidebar navigation
    with st.sidebar:
        st.markdown("<h1>Workshop Navigation</h1>", unsafe_allow_html=True)
        
        # Visual header
        st.markdown("<div style='display:flex; align-items:center; margin-bottom:20px;'>", unsafe_allow_html=True)
        
        # Show a progress indicator
        st.markdown(f"<div><b>Your Progress:</b> Module {current_module} of 6</div>", unsafe_allow_html=True)
        st.progress(current_module / 6)
        
        # Module navigation buttons
        st.markdown("<h3>Modules</h3>", unsafe_allow_html=True)
        
        for i, name in enumerate(module_names, 1):
            btn_style = "primary" if i == current_module else "secondary"
            if st.button(f"{i}: {name}", key=f"nav_module_{i}", type=btn_style, use_container_width=True):
                st.session_state.current_module = i
                st.rerun()
        
        # About the workshop in a collapsible section
        with st.expander("About the Workshop"):
            st.markdown("""
            This interactive workshop explores OCR technology for historical documents. 
            
            **How to use this workshop:**
            1. Navigate through modules sequentially
            2. Expand content sections to read more
            3. Try the interactive OCR experiment
            4. Reflect on research questions
            
            For help or more information, use the reference materials in Module 6.
            """)
            
        # Processing history if available
        if st.session_state.processing_history:
            with st.expander("Your Activity"):
                st.markdown(f"<b>Documents processed:</b> {len(st.session_state.processing_history)}", unsafe_allow_html=True)
                
                # Show the most recent document processed
                latest = st.session_state.processing_history[-1]
                st.markdown(f"""
                <div style="background:#f9f9f9; padding:8px; border-radius:4px; margin-top:10px; color:#333;">
                    <b>Latest document:</b> {latest['fileName']}<br>
                    <span style="font-size:0.9rem;">Processed with {' vision model' if latest['useVision'] else ' basic OCR'}</span>
                </div>
                """, unsafe_allow_html=True)
    
    # Render the current module content using the page wrapper
    page_wrapper(module.render, current_module)

# At the bottom of the page, create the hidden navigation buttons for the fixed navigation bar
if st.session_state.workshop_started:
    # Previous navigation button (hidden, activated by the fixed nav)
    if st.session_state.current_module > 1:
        if st.button("←", key=f"nav_prev_{st.session_state.current_module-1}", label_visibility="collapsed"):
            st.session_state.current_module -= 1
            st.rerun()
    
    # Next navigation button (hidden, activated by the fixed nav)
    if st.session_state.current_module < 6:
        if st.button("→", key=f"nav_next_{st.session_state.current_module+1}", label_visibility="collapsed"):
            st.session_state.current_module += 1
            st.rerun()
    
    # Module navigation dots (hidden, activated by the fixed nav)
    for i in range(1, 7):
        if st.button(f"{i}", key=f"nav_dot_{i}", label_visibility="collapsed"):
            st.session_state.current_module = i
            st.rerun()