import streamlit as st
from pathlib import Path
import sys
from layout import page_wrapper
from modules import get_module, get_module_name, module_names
# Set page configuration with dark theme
st.set_page_config(
page_title="Historical OCR Workshop",
page_icon="📜",
layout="wide",
initial_sidebar_state="collapsed"
)
# Initialize session state for workshop navigation
if 'current_module' not in st.session_state:
st.session_state.current_module = 1
if 'workshop_started' not in st.session_state:
st.session_state.workshop_started = False
if 'processing_history' not in st.session_state:
st.session_state.processing_history = []
def navigate_to_module(module_number):
"""Navigate to a specific module"""
st.session_state.current_module = module_number
st.rerun()
# Welcome screen if workshop hasn't been started
if not st.session_state.workshop_started:
def welcome_screen():
"""Renders the welcome/start screen"""
# Hero section with eye-catching design
st.markdown("""
Historical OCR Workshop
Unlock the potential of historical documents with modern OCR technology
""", unsafe_allow_html=True)
# Introduction with cleaner layout
col1, col2 = st.columns([3, 2])
with col1:
st.markdown("""
Workshop Overview
This interactive workshop explores the application of OCR technology to historical documents,
combining theoretical understanding with practical experiences. Designed for historians,
archivists, and digital humanities scholars, it offers both conceptual frameworks and hands-on skills.
""", unsafe_allow_html=True)
st.markdown("""
What is OCR?
Optical Character Recognition (OCR) technology enables computers to extract text from images and documents.
Modern OCR uses AI vision models to understand both the text and its visual context, making it powerful for
historical research and digital humanities.
""", unsafe_allow_html=True)
with col2:
# Add an engaging research question
st.markdown("""
For Historians:
How might OCR technology transform our access to and interpretation of historical documents?
What new research questions become possible when large archives become machine-readable?
""", unsafe_allow_html=True)
# Display a sample historical document image
input_dir = Path(__file__).parent / "input"
sample_path = input_dir / "magellan-travels.jpg"
if sample_path.exists():
try:
from PIL import Image
with Image.open(sample_path) as img:
st.image(img, caption="Sample Historical Document", width=300)
except Exception:
pass
# What you'll learn section with visual learning outcomes
st.markdown('What You\'ll Learn
', unsafe_allow_html=True)
# Create three columns for clean layout
col1, col2, col3 = st.columns(3)
with col1:
st.markdown("""
Conceptual Understanding
- Text-image relationships in historical documents
- Evolution of OCR technology
- AI vision models for document analysis
- Historical typography challenges
""", unsafe_allow_html=True)
with col2:
st.markdown("""
Methodological Approaches
- Critical frameworks for OCR in historical research
- Hybrid computational-traditional methods
- Error analysis and interpretation
- Contextual reading strategies
""", unsafe_allow_html=True)
with col3:
st.markdown("""
Practical Skills
- Processing historical documents with OCR
- Analyzing and structuring extracted information
- Integrating OCR into research workflows
- Building searchable archives
""", unsafe_allow_html=True)
# Module overview
st.markdown('Workshop Modules
', unsafe_allow_html=True)
# First row of modules
col1, col2 = st.columns(2)
with col1:
for i in [1, 3, 5]:
st.markdown(f"""
Module {i}
{module_names[i-1]}
Module {i} of the historical OCR workshop.
""", unsafe_allow_html=True)
with col2:
for i in [2, 4, 6]:
st.markdown(f"""
Module {i}
{module_names[i-1]}
Module {i} of the historical OCR workshop.
""", unsafe_allow_html=True)
# Inspirational quote
st.markdown("""
"The digital turn in historical research is not just about converting analog to digital;
it's about transforming how we access, analyze, and interpret the past."
— Dr. Jane Winters, Professor of Digital Humanities
""", unsafe_allow_html=True)
# Start button with enhanced styling
st.markdown('', unsafe_allow_html=True)
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
if st.button("Begin Workshop Journey", key="start_workshop", type="primary", use_container_width=True):
st.session_state.workshop_started = True
st.rerun()
st.markdown('
No installation required • Start immediately
', unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
# Display the welcome screen (outside modules)
welcome_screen()
else:
# Get the current module to display
current_module = st.session_state.current_module
module = get_module(current_module)
# Create navigation callbacks for the page wrapper
def nav_to_prev():
if current_module > 1:
st.session_state.current_module = current_module - 1
st.rerun()
def nav_to_next():
if current_module < 6:
st.session_state.current_module = current_module + 1
st.rerun()
# Create the sidebar navigation
with st.sidebar:
st.markdown("Workshop Navigation
", unsafe_allow_html=True)
# Visual header
st.markdown("", unsafe_allow_html=True)
# Show a progress indicator
st.markdown(f"
Your Progress: Module {current_module} of 6
", unsafe_allow_html=True)
st.progress(current_module / 6)
# Module navigation buttons
st.markdown("
Modules
", unsafe_allow_html=True)
for i, name in enumerate(module_names, 1):
btn_style = "primary" if i == current_module else "secondary"
if st.button(f"{i}: {name}", key=f"nav_module_{i}", type=btn_style, use_container_width=True):
st.session_state.current_module = i
st.rerun()
# About the workshop in a collapsible section
with st.expander("About the Workshop"):
st.markdown("""
This interactive workshop explores OCR technology for historical documents.
**How to use this workshop:**
1. Navigate through modules sequentially
2. Expand content sections to read more
3. Try the interactive OCR experiment
4. Reflect on research questions
For help or more information, use the reference materials in Module 6.
""")
# Processing history if available
if st.session_state.processing_history:
with st.expander("Your Activity"):
st.markdown(f"
Documents processed: {len(st.session_state.processing_history)}", unsafe_allow_html=True)
# Show the most recent document processed
latest = st.session_state.processing_history[-1]
st.markdown(f"""
Latest document: {latest['fileName']}
Processed with {' vision model' if latest['useVision'] else ' basic OCR'}
""", unsafe_allow_html=True)
# Render the current module content using the page wrapper
page_wrapper(module.render, current_module)
# At the bottom of the page, create the hidden navigation buttons for the fixed navigation bar
if st.session_state.workshop_started:
# Previous navigation button (hidden, activated by the fixed nav)
if st.session_state.current_module > 1:
if st.button("←", key=f"nav_prev_{st.session_state.current_module-1}", label_visibility="collapsed"):
st.session_state.current_module -= 1
st.rerun()
# Next navigation button (hidden, activated by the fixed nav)
if st.session_state.current_module < 6:
if st.button("→", key=f"nav_next_{st.session_state.current_module+1}", label_visibility="collapsed"):
st.session_state.current_module += 1
st.rerun()
# Module navigation dots (hidden, activated by the fixed nav)
for i in range(1, 7):
if st.button(f"{i}", key=f"nav_dot_{i}", label_visibility="collapsed"):
st.session_state.current_module = i
st.rerun()