File size: 12,702 Bytes
85bdb4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
import streamlit as st
from pathlib import Path
import sys
from layout import page_wrapper
from modules import get_module, get_module_name, module_names

# Set page configuration with dark theme
st.set_page_config(
    page_title="Historical OCR Workshop",
    page_icon="📜",
    layout="wide",
    initial_sidebar_state="collapsed"
)

# Initialize session state for workshop navigation
if 'current_module' not in st.session_state:
    st.session_state.current_module = 1

if 'workshop_started' not in st.session_state:
    st.session_state.workshop_started = False

if 'processing_history' not in st.session_state:
    st.session_state.processing_history = []

def navigate_to_module(module_number):
    """Navigate to a specific module"""
    st.session_state.current_module = module_number
    st.rerun()

# Welcome screen if workshop hasn't been started
if not st.session_state.workshop_started:
    def welcome_screen():
        """Renders the welcome/start screen"""
        # Hero section with eye-catching design
        st.markdown("""
        <div style="background: linear-gradient(135deg, #1E3A8A 0%, #2563EB 100%); 
                    padding: 2rem; border-radius: 0.75rem; text-align: center; 
                    margin-bottom: 2rem; box-shadow: 0 4px 6px rgba(0,0,0,0.3);">
            <h1>Historical OCR Workshop</h1>
            <p style="font-size: 1.25rem;">Unlock the potential of historical documents with modern OCR technology</p>
        </div>
        """, unsafe_allow_html=True)
        
        # Introduction with cleaner layout
        col1, col2 = st.columns([3, 2])
        
        with col1:
            st.markdown("""
            <div style="background-color: #1f2937; padding: 1.5rem; border-radius: 0.75rem; margin-bottom: 1.5rem;">
            <h3>Workshop Overview</h3>
            
            This interactive workshop explores the application of OCR technology to historical documents,
            combining theoretical understanding with practical experiences. Designed for historians, 
            archivists, and digital humanities scholars, it offers both conceptual frameworks and hands-on skills.
            </div>
            """, unsafe_allow_html=True)
            
            st.markdown("""
            <div style="background-color: #374151; padding: 0.75rem; border-radius: 0.5rem; 
                        margin: 1rem 0; border-left: 3px solid #3B82F6;">
            <h4>What is OCR?</h4>
            Optical Character Recognition (OCR) technology enables computers to extract text from images and documents.
            Modern OCR uses AI vision models to understand both the text and its visual context, making it powerful for
            historical research and digital humanities.
            </div>
            """, unsafe_allow_html=True)
            
        with col2:
            # Add an engaging research question
            st.markdown("""
            <div style="background-color: #1E3A8A; color: white; padding: 0.75rem; 
                       border-radius: 0.5rem; margin: 1rem 0; border-left: 3px solid #60A5FA;">
            <h4>For Historians:</h4>
            How might OCR technology transform our access to and interpretation of historical documents? 
            What new research questions become possible when large archives become machine-readable?
            </div>
            """, unsafe_allow_html=True)
            
            # Display a sample historical document image
            input_dir = Path(__file__).parent / "input"
            sample_path = input_dir / "magellan-travels.jpg"
            if sample_path.exists():
                try:
                    from PIL import Image
                    with Image.open(sample_path) as img:
                        st.image(img, caption="Sample Historical Document", width=300)
                except Exception:
                    pass
        
        # What you'll learn section with visual learning outcomes
        st.markdown('<h3>What You\'ll Learn</h3>', unsafe_allow_html=True)
        
        # Create three columns for clean layout
        col1, col2, col3 = st.columns(3)
        
        with col1:
            st.markdown("""
            <div style="background-color: #1f2937; padding: 1rem; border-radius: 0.5rem;">
            <h4>Conceptual Understanding</h4>
            
            - Text-image relationships in historical documents
            - Evolution of OCR technology 
            - AI vision models for document analysis
            - Historical typography challenges
            </div>
            """, unsafe_allow_html=True)
            
        with col2:
            st.markdown("""
            <div style="background-color: #1f2937; padding: 1rem; border-radius: 0.5rem;">
            <h4>Methodological Approaches</h4>
            
            - Critical frameworks for OCR in historical research
            - Hybrid computational-traditional methods
            - Error analysis and interpretation
            - Contextual reading strategies
            </div>
            """, unsafe_allow_html=True)
            
        with col3:
            st.markdown("""
            <div style="background-color: #1f2937; padding: 1rem; border-radius: 0.5rem;">
            <h4>Practical Skills</h4>
            
            - Processing historical documents with OCR
            - Analyzing and structuring extracted information
            - Integrating OCR into research workflows
            - Building searchable archives
            </div>
            """, unsafe_allow_html=True)
        
        # Module overview
        st.markdown('<h3>Workshop Modules</h3>', unsafe_allow_html=True)
        
        # First row of modules
        col1, col2 = st.columns(2)
        
        with col1:
            for i in [1, 3, 5]:
                st.markdown(f"""
                <div style="background-color: #1f2937; border-radius: 8px; padding: 16px; 
                           margin-bottom: 16px; border-top: 4px solid #3B82F6;">
                    <div style="background-color: #3B82F6; color: white; font-weight: bold; 
                               padding: 4px 10px; border-radius: 12px; font-size: 0.9rem; 
                               display: inline-block; margin-bottom: 8px;">Module {i}</div>
                    <div style="font-weight: 600; margin-bottom: 8px; font-size: 1.1rem; color: white;">
                        {module_names[i-1]}
                    </div>
                    <p>Module {i} of the historical OCR workshop.</p>
                </div>
                """, unsafe_allow_html=True)
        
        with col2:
            for i in [2, 4, 6]:
                st.markdown(f"""
                <div style="background-color: #1f2937; border-radius: 8px; padding: 16px; 
                           margin-bottom: 16px; border-top: 4px solid #3B82F6;">
                    <div style="background-color: #3B82F6; color: white; font-weight: bold; 
                               padding: 4px 10px; border-radius: 12px; font-size: 0.9rem; 
                               display: inline-block; margin-bottom: 8px;">Module {i}</div>
                    <div style="font-weight: 600; margin-bottom: 8px; font-size: 1.1rem; color: white;">
                        {module_names[i-1]}
                    </div>
                    <p>Module {i} of the historical OCR workshop.</p>
                </div>
                """, unsafe_allow_html=True)
        
        # Inspirational quote
        st.markdown("""
        <div style="font-style: italic; color: #D1D5DB; padding: 0.5rem 1rem; 
                   border-left: 3px solid #4B5563; margin: 1rem 0;">
        "The digital turn in historical research is not just about converting analog to digital; 
        it's about transforming how we access, analyze, and interpret the past."
        <br/><br/>
        <span style="font-size:0.9rem; text-align:right; display:block;">— Dr. Jane Winters, Professor of Digital Humanities</span>
        </div>
        """, unsafe_allow_html=True)
        
        # Start button with enhanced styling
        st.markdown('<div style="text-align: center; margin-top: 2rem;">', unsafe_allow_html=True)
        col1, col2, col3 = st.columns([1, 2, 1])
        with col2:
            if st.button("Begin Workshop Journey", key="start_workshop", type="primary", use_container_width=True):
                st.session_state.workshop_started = True
                st.rerun()
        st.markdown('<p style="text-align:center; margin-top:8px; font-size:0.9rem; color:#666;">No installation required • Start immediately</p>', unsafe_allow_html=True)
        st.markdown('</div>', unsafe_allow_html=True)
    
    # Display the welcome screen (outside modules)
    welcome_screen()
else:
    # Get the current module to display
    current_module = st.session_state.current_module
    module = get_module(current_module)
    
    # Create navigation callbacks for the page wrapper
    def nav_to_prev():
        if current_module > 1:
            st.session_state.current_module = current_module - 1
            st.rerun()
    
    def nav_to_next():
        if current_module < 6:
            st.session_state.current_module = current_module + 1
            st.rerun()
    
    # Create the sidebar navigation
    with st.sidebar:
        st.markdown("<h1>Workshop Navigation</h1>", unsafe_allow_html=True)
        
        # Visual header
        st.markdown("<div style='display:flex; align-items:center; margin-bottom:20px;'>", unsafe_allow_html=True)
        
        # Show a progress indicator
        st.markdown(f"<div><b>Your Progress:</b> Module {current_module} of 6</div>", unsafe_allow_html=True)
        st.progress(current_module / 6)
        
        # Module navigation buttons
        st.markdown("<h3>Modules</h3>", unsafe_allow_html=True)
        
        for i, name in enumerate(module_names, 1):
            btn_style = "primary" if i == current_module else "secondary"
            if st.button(f"{i}: {name}", key=f"nav_module_{i}", type=btn_style, use_container_width=True):
                st.session_state.current_module = i
                st.rerun()
        
        # About the workshop in a collapsible section
        with st.expander("About the Workshop"):
            st.markdown("""
            This interactive workshop explores OCR technology for historical documents. 
            
            **How to use this workshop:**
            1. Navigate through modules sequentially
            2. Expand content sections to read more
            3. Try the interactive OCR experiment
            4. Reflect on research questions
            
            For help or more information, use the reference materials in Module 6.
            """)
            
        # Processing history if available
        if st.session_state.processing_history:
            with st.expander("Your Activity"):
                st.markdown(f"<b>Documents processed:</b> {len(st.session_state.processing_history)}", unsafe_allow_html=True)
                
                # Show the most recent document processed
                latest = st.session_state.processing_history[-1]
                st.markdown(f"""
                <div style="background:#f9f9f9; padding:8px; border-radius:4px; margin-top:10px; color:#333;">
                    <b>Latest document:</b> {latest['fileName']}<br>
                    <span style="font-size:0.9rem;">Processed with {' vision model' if latest['useVision'] else ' basic OCR'}</span>
                </div>
                """, unsafe_allow_html=True)
    
    # Render the current module content using the page wrapper
    page_wrapper(module.render, current_module)

# At the bottom of the page, create the hidden navigation buttons for the fixed navigation bar
if st.session_state.workshop_started:
    # Previous navigation button (hidden, activated by the fixed nav)
    if st.session_state.current_module > 1:
        if st.button("←", key=f"nav_prev_{st.session_state.current_module-1}", label_visibility="collapsed"):
            st.session_state.current_module -= 1
            st.rerun()
    
    # Next navigation button (hidden, activated by the fixed nav)
    if st.session_state.current_module < 6:
        if st.button("→", key=f"nav_next_{st.session_state.current_module+1}", label_visibility="collapsed"):
            st.session_state.current_module += 1
            st.rerun()
    
    # Module navigation dots (hidden, activated by the fixed nav)
    for i in range(1, 7):
        if st.button(f"{i}", key=f"nav_dot_{i}", label_visibility="collapsed"):
            st.session_state.current_module = i
            st.rerun()