import streamlit as st from pathlib import Path import sys from layout import page_wrapper from modules import get_module, get_module_name, module_names # Set page configuration with dark theme st.set_page_config( page_title="Historical OCR Workshop", page_icon="📜", layout="wide", initial_sidebar_state="collapsed" ) # Initialize session state for workshop navigation if 'current_module' not in st.session_state: st.session_state.current_module = 1 if 'workshop_started' not in st.session_state: st.session_state.workshop_started = False if 'processing_history' not in st.session_state: st.session_state.processing_history = [] def navigate_to_module(module_number): """Navigate to a specific module""" st.session_state.current_module = module_number st.rerun() # Welcome screen if workshop hasn't been started if not st.session_state.workshop_started: def welcome_screen(): """Renders the welcome/start screen""" # Hero section with eye-catching design st.markdown("""

Historical OCR Workshop

Unlock the potential of historical documents with modern OCR technology

""", unsafe_allow_html=True) # Introduction with cleaner layout col1, col2 = st.columns([3, 2]) with col1: st.markdown("""

Workshop Overview

This interactive workshop explores the application of OCR technology to historical documents, combining theoretical understanding with practical experiences. Designed for historians, archivists, and digital humanities scholars, it offers both conceptual frameworks and hands-on skills.
""", unsafe_allow_html=True) st.markdown("""

What is OCR?

Optical Character Recognition (OCR) technology enables computers to extract text from images and documents. Modern OCR uses AI vision models to understand both the text and its visual context, making it powerful for historical research and digital humanities.
""", unsafe_allow_html=True) with col2: # Add an engaging research question st.markdown("""

For Historians:

How might OCR technology transform our access to and interpretation of historical documents? What new research questions become possible when large archives become machine-readable?
""", unsafe_allow_html=True) # Display a sample historical document image input_dir = Path(__file__).parent / "input" sample_path = input_dir / "magellan-travels.jpg" if sample_path.exists(): try: from PIL import Image with Image.open(sample_path) as img: st.image(img, caption="Sample Historical Document", width=300) except Exception: pass # What you'll learn section with visual learning outcomes st.markdown('

What You\'ll Learn

', unsafe_allow_html=True) # Create three columns for clean layout col1, col2, col3 = st.columns(3) with col1: st.markdown("""

Conceptual Understanding

- Text-image relationships in historical documents - Evolution of OCR technology - AI vision models for document analysis - Historical typography challenges
""", unsafe_allow_html=True) with col2: st.markdown("""

Methodological Approaches

- Critical frameworks for OCR in historical research - Hybrid computational-traditional methods - Error analysis and interpretation - Contextual reading strategies
""", unsafe_allow_html=True) with col3: st.markdown("""

Practical Skills

- Processing historical documents with OCR - Analyzing and structuring extracted information - Integrating OCR into research workflows - Building searchable archives
""", unsafe_allow_html=True) # Module overview st.markdown('

Workshop Modules

', unsafe_allow_html=True) # First row of modules col1, col2 = st.columns(2) with col1: for i in [1, 3, 5]: st.markdown(f"""
Module {i}
{module_names[i-1]}

Module {i} of the historical OCR workshop.

""", unsafe_allow_html=True) with col2: for i in [2, 4, 6]: st.markdown(f"""
Module {i}
{module_names[i-1]}

Module {i} of the historical OCR workshop.

""", unsafe_allow_html=True) # Inspirational quote st.markdown("""
"The digital turn in historical research is not just about converting analog to digital; it's about transforming how we access, analyze, and interpret the past."

— Dr. Jane Winters, Professor of Digital Humanities
""", unsafe_allow_html=True) # Start button with enhanced styling st.markdown('
', unsafe_allow_html=True) col1, col2, col3 = st.columns([1, 2, 1]) with col2: if st.button("Begin Workshop Journey", key="start_workshop", type="primary", use_container_width=True): st.session_state.workshop_started = True st.rerun() st.markdown('

No installation required • Start immediately

', unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) # Display the welcome screen (outside modules) welcome_screen() else: # Get the current module to display current_module = st.session_state.current_module module = get_module(current_module) # Create navigation callbacks for the page wrapper def nav_to_prev(): if current_module > 1: st.session_state.current_module = current_module - 1 st.rerun() def nav_to_next(): if current_module < 6: st.session_state.current_module = current_module + 1 st.rerun() # Create the sidebar navigation with st.sidebar: st.markdown("

Workshop Navigation

", unsafe_allow_html=True) # Visual header st.markdown("
", unsafe_allow_html=True) # Show a progress indicator st.markdown(f"
Your Progress: Module {current_module} of 6
", unsafe_allow_html=True) st.progress(current_module / 6) # Module navigation buttons st.markdown("

Modules

", unsafe_allow_html=True) for i, name in enumerate(module_names, 1): btn_style = "primary" if i == current_module else "secondary" if st.button(f"{i}: {name}", key=f"nav_module_{i}", type=btn_style, use_container_width=True): st.session_state.current_module = i st.rerun() # About the workshop in a collapsible section with st.expander("About the Workshop"): st.markdown(""" This interactive workshop explores OCR technology for historical documents. **How to use this workshop:** 1. Navigate through modules sequentially 2. Expand content sections to read more 3. Try the interactive OCR experiment 4. Reflect on research questions For help or more information, use the reference materials in Module 6. """) # Processing history if available if st.session_state.processing_history: with st.expander("Your Activity"): st.markdown(f"Documents processed: {len(st.session_state.processing_history)}", unsafe_allow_html=True) # Show the most recent document processed latest = st.session_state.processing_history[-1] st.markdown(f"""
Latest document: {latest['fileName']}
Processed with {' vision model' if latest['useVision'] else ' basic OCR'}
""", unsafe_allow_html=True) # Render the current module content using the page wrapper page_wrapper(module.render, current_module) # At the bottom of the page, create the hidden navigation buttons for the fixed navigation bar if st.session_state.workshop_started: # Previous navigation button (hidden, activated by the fixed nav) if st.session_state.current_module > 1: if st.button("←", key=f"nav_prev_{st.session_state.current_module-1}", label_visibility="collapsed"): st.session_state.current_module -= 1 st.rerun() # Next navigation button (hidden, activated by the fixed nav) if st.session_state.current_module < 6: if st.button("→", key=f"nav_next_{st.session_state.current_module+1}", label_visibility="collapsed"): st.session_state.current_module += 1 st.rerun() # Module navigation dots (hidden, activated by the fixed nav) for i in range(1, 7): if st.button(f"{i}", key=f"nav_dot_{i}", label_visibility="collapsed"): st.session_state.current_module = i st.rerun()