Spaces:
Running
Running
| import streamlit as st | |
| from pathlib import Path | |
| import sys | |
| from layout import page_wrapper | |
| from modules import get_module, get_module_name, module_names | |
| # Set page configuration with dark theme | |
| st.set_page_config( | |
| page_title="Historical OCR Workshop", | |
| page_icon="📜", | |
| layout="wide", | |
| initial_sidebar_state="collapsed" | |
| ) | |
| # Initialize session state for workshop navigation | |
| if 'current_module' not in st.session_state: | |
| st.session_state.current_module = 1 | |
| if 'workshop_started' not in st.session_state: | |
| st.session_state.workshop_started = False | |
| if 'processing_history' not in st.session_state: | |
| st.session_state.processing_history = [] | |
| def navigate_to_module(module_number): | |
| """Navigate to a specific module""" | |
| st.session_state.current_module = module_number | |
| st.rerun() | |
| # Welcome screen if workshop hasn't been started | |
| if not st.session_state.workshop_started: | |
| def welcome_screen(): | |
| """Renders the welcome/start screen""" | |
| # Hero section with eye-catching design | |
| st.markdown(""" | |
| <div style="background: linear-gradient(135deg, #1E3A8A 0%, #2563EB 100%); | |
| padding: 2rem; border-radius: 0.75rem; text-align: center; | |
| margin-bottom: 2rem; box-shadow: 0 4px 6px rgba(0,0,0,0.3);"> | |
| <h1>Historical OCR Workshop</h1> | |
| <p style="font-size: 1.25rem;">Unlock the potential of historical documents with modern OCR technology</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Introduction with cleaner layout | |
| col1, col2 = st.columns([3, 2]) | |
| with col1: | |
| st.markdown(""" | |
| <div style="background-color: #1f2937; padding: 1.5rem; border-radius: 0.75rem; margin-bottom: 1.5rem;"> | |
| <h3>Workshop Overview</h3> | |
| This interactive workshop explores the application of OCR technology to historical documents, | |
| combining theoretical understanding with practical experiences. Designed for historians, | |
| archivists, and digital humanities scholars, it offers both conceptual frameworks and hands-on skills. | |
| </div> | |
| """, unsafe_allow_html=True) | |
| st.markdown(""" | |
| <div style="background-color: #374151; padding: 0.75rem; border-radius: 0.5rem; | |
| margin: 1rem 0; border-left: 3px solid #3B82F6;"> | |
| <h4>What is OCR?</h4> | |
| Optical Character Recognition (OCR) technology enables computers to extract text from images and documents. | |
| Modern OCR uses AI vision models to understand both the text and its visual context, making it powerful for | |
| historical research and digital humanities. | |
| </div> | |
| """, unsafe_allow_html=True) | |
| with col2: | |
| # Add an engaging research question | |
| st.markdown(""" | |
| <div style="background-color: #1E3A8A; color: white; padding: 0.75rem; | |
| border-radius: 0.5rem; margin: 1rem 0; border-left: 3px solid #60A5FA;"> | |
| <h4>For Historians:</h4> | |
| How might OCR technology transform our access to and interpretation of historical documents? | |
| What new research questions become possible when large archives become machine-readable? | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Display a sample historical document image | |
| input_dir = Path(__file__).parent / "input" | |
| sample_path = input_dir / "magellan-travels.jpg" | |
| if sample_path.exists(): | |
| try: | |
| from PIL import Image | |
| with Image.open(sample_path) as img: | |
| st.image(img, caption="Sample Historical Document", width=300) | |
| except Exception: | |
| pass | |
| # What you'll learn section with visual learning outcomes | |
| st.markdown('<h3>What You\'ll Learn</h3>', unsafe_allow_html=True) | |
| # Create three columns for clean layout | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.markdown(""" | |
| <div style="background-color: #1f2937; padding: 1rem; border-radius: 0.5rem;"> | |
| <h4>Conceptual Understanding</h4> | |
| - Text-image relationships in historical documents | |
| - Evolution of OCR technology | |
| - AI vision models for document analysis | |
| - Historical typography challenges | |
| </div> | |
| """, unsafe_allow_html=True) | |
| with col2: | |
| st.markdown(""" | |
| <div style="background-color: #1f2937; padding: 1rem; border-radius: 0.5rem;"> | |
| <h4>Methodological Approaches</h4> | |
| - Critical frameworks for OCR in historical research | |
| - Hybrid computational-traditional methods | |
| - Error analysis and interpretation | |
| - Contextual reading strategies | |
| </div> | |
| """, unsafe_allow_html=True) | |
| with col3: | |
| st.markdown(""" | |
| <div style="background-color: #1f2937; padding: 1rem; border-radius: 0.5rem;"> | |
| <h4>Practical Skills</h4> | |
| - Processing historical documents with OCR | |
| - Analyzing and structuring extracted information | |
| - Integrating OCR into research workflows | |
| - Building searchable archives | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Module overview | |
| st.markdown('<h3>Workshop Modules</h3>', unsafe_allow_html=True) | |
| # First row of modules | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| for i in [1, 3, 5]: | |
| st.markdown(f""" | |
| <div style="background-color: #1f2937; border-radius: 8px; padding: 16px; | |
| margin-bottom: 16px; border-top: 4px solid #3B82F6;"> | |
| <div style="background-color: #3B82F6; color: white; font-weight: bold; | |
| padding: 4px 10px; border-radius: 12px; font-size: 0.9rem; | |
| display: inline-block; margin-bottom: 8px;">Module {i}</div> | |
| <div style="font-weight: 600; margin-bottom: 8px; font-size: 1.1rem; color: white;"> | |
| {module_names[i-1]} | |
| </div> | |
| <p>Module {i} of the historical OCR workshop.</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| with col2: | |
| for i in [2, 4, 6]: | |
| st.markdown(f""" | |
| <div style="background-color: #1f2937; border-radius: 8px; padding: 16px; | |
| margin-bottom: 16px; border-top: 4px solid #3B82F6;"> | |
| <div style="background-color: #3B82F6; color: white; font-weight: bold; | |
| padding: 4px 10px; border-radius: 12px; font-size: 0.9rem; | |
| display: inline-block; margin-bottom: 8px;">Module {i}</div> | |
| <div style="font-weight: 600; margin-bottom: 8px; font-size: 1.1rem; color: white;"> | |
| {module_names[i-1]} | |
| </div> | |
| <p>Module {i} of the historical OCR workshop.</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Inspirational quote | |
| st.markdown(""" | |
| <div style="font-style: italic; color: #D1D5DB; padding: 0.5rem 1rem; | |
| border-left: 3px solid #4B5563; margin: 1rem 0;"> | |
| "The digital turn in historical research is not just about converting analog to digital; | |
| it's about transforming how we access, analyze, and interpret the past." | |
| <br/><br/> | |
| <span style="font-size:0.9rem; text-align:right; display:block;">— Dr. Jane Winters, Professor of Digital Humanities</span> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Start button with enhanced styling | |
| st.markdown('<div style="text-align: center; margin-top: 2rem;">', unsafe_allow_html=True) | |
| col1, col2, col3 = st.columns([1, 2, 1]) | |
| with col2: | |
| if st.button("Begin Workshop Journey", key="start_workshop", type="primary", use_container_width=True): | |
| st.session_state.workshop_started = True | |
| st.rerun() | |
| st.markdown('<p style="text-align:center; margin-top:8px; font-size:0.9rem; color:#666;">No installation required • Start immediately</p>', unsafe_allow_html=True) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| # Display the welcome screen (outside modules) | |
| welcome_screen() | |
| else: | |
| # Get the current module to display | |
| current_module = st.session_state.current_module | |
| module = get_module(current_module) | |
| # Create navigation callbacks for the page wrapper | |
| def nav_to_prev(): | |
| if current_module > 1: | |
| st.session_state.current_module = current_module - 1 | |
| st.rerun() | |
| def nav_to_next(): | |
| if current_module < 6: | |
| st.session_state.current_module = current_module + 1 | |
| st.rerun() | |
| # Create the sidebar navigation | |
| with st.sidebar: | |
| st.markdown("<h1>Workshop Navigation</h1>", unsafe_allow_html=True) | |
| # Visual header | |
| st.markdown("<div style='display:flex; align-items:center; margin-bottom:20px;'>", unsafe_allow_html=True) | |
| # Show a progress indicator | |
| st.markdown(f"<div><b>Your Progress:</b> Module {current_module} of 6</div>", unsafe_allow_html=True) | |
| st.progress(current_module / 6) | |
| # Module navigation buttons | |
| st.markdown("<h3>Modules</h3>", unsafe_allow_html=True) | |
| for i, name in enumerate(module_names, 1): | |
| btn_style = "primary" if i == current_module else "secondary" | |
| if st.button(f"{i}: {name}", key=f"nav_module_{i}", type=btn_style, use_container_width=True): | |
| st.session_state.current_module = i | |
| st.rerun() | |
| # About the workshop in a collapsible section | |
| with st.expander("About the Workshop"): | |
| st.markdown(""" | |
| This interactive workshop explores OCR technology for historical documents. | |
| **How to use this workshop:** | |
| 1. Navigate through modules sequentially | |
| 2. Expand content sections to read more | |
| 3. Try the interactive OCR experiment | |
| 4. Reflect on research questions | |
| For help or more information, use the reference materials in Module 6. | |
| """) | |
| # Processing history if available | |
| if st.session_state.processing_history: | |
| with st.expander("Your Activity"): | |
| st.markdown(f"<b>Documents processed:</b> {len(st.session_state.processing_history)}", unsafe_allow_html=True) | |
| # Show the most recent document processed | |
| latest = st.session_state.processing_history[-1] | |
| st.markdown(f""" | |
| <div style="background:#f9f9f9; padding:8px; border-radius:4px; margin-top:10px; color:#333;"> | |
| <b>Latest document:</b> {latest['fileName']}<br> | |
| <span style="font-size:0.9rem;">Processed with {' vision model' if latest['useVision'] else ' basic OCR'}</span> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Render the current module content using the page wrapper | |
| page_wrapper(module.render, current_module) | |
| # At the bottom of the page, create the hidden navigation buttons for the fixed navigation bar | |
| if st.session_state.workshop_started: | |
| # Previous navigation button (hidden, activated by the fixed nav) | |
| if st.session_state.current_module > 1: | |
| if st.button("←", key=f"nav_prev_{st.session_state.current_module-1}", label_visibility="collapsed"): | |
| st.session_state.current_module -= 1 | |
| st.rerun() | |
| # Next navigation button (hidden, activated by the fixed nav) | |
| if st.session_state.current_module < 6: | |
| if st.button("→", key=f"nav_next_{st.session_state.current_module+1}", label_visibility="collapsed"): | |
| st.session_state.current_module += 1 | |
| st.rerun() | |
| # Module navigation dots (hidden, activated by the fixed nav) | |
| for i in range(1, 7): | |
| if st.button(f"{i}", key=f"nav_dot_{i}", label_visibility="collapsed"): | |
| st.session_state.current_module = i | |
| st.rerun() |