Spaces:
Running
Running
File size: 12,702 Bytes
85bdb4e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 |
import streamlit as st
from pathlib import Path
import sys
from layout import page_wrapper
from modules import get_module, get_module_name, module_names
# Set page configuration with dark theme
st.set_page_config(
page_title="Historical OCR Workshop",
page_icon="📜",
layout="wide",
initial_sidebar_state="collapsed"
)
# Initialize session state for workshop navigation
if 'current_module' not in st.session_state:
st.session_state.current_module = 1
if 'workshop_started' not in st.session_state:
st.session_state.workshop_started = False
if 'processing_history' not in st.session_state:
st.session_state.processing_history = []
def navigate_to_module(module_number):
"""Navigate to a specific module"""
st.session_state.current_module = module_number
st.rerun()
# Welcome screen if workshop hasn't been started
if not st.session_state.workshop_started:
def welcome_screen():
"""Renders the welcome/start screen"""
# Hero section with eye-catching design
st.markdown("""
<div style="background: linear-gradient(135deg, #1E3A8A 0%, #2563EB 100%);
padding: 2rem; border-radius: 0.75rem; text-align: center;
margin-bottom: 2rem; box-shadow: 0 4px 6px rgba(0,0,0,0.3);">
<h1>Historical OCR Workshop</h1>
<p style="font-size: 1.25rem;">Unlock the potential of historical documents with modern OCR technology</p>
</div>
""", unsafe_allow_html=True)
# Introduction with cleaner layout
col1, col2 = st.columns([3, 2])
with col1:
st.markdown("""
<div style="background-color: #1f2937; padding: 1.5rem; border-radius: 0.75rem; margin-bottom: 1.5rem;">
<h3>Workshop Overview</h3>
This interactive workshop explores the application of OCR technology to historical documents,
combining theoretical understanding with practical experiences. Designed for historians,
archivists, and digital humanities scholars, it offers both conceptual frameworks and hands-on skills.
</div>
""", unsafe_allow_html=True)
st.markdown("""
<div style="background-color: #374151; padding: 0.75rem; border-radius: 0.5rem;
margin: 1rem 0; border-left: 3px solid #3B82F6;">
<h4>What is OCR?</h4>
Optical Character Recognition (OCR) technology enables computers to extract text from images and documents.
Modern OCR uses AI vision models to understand both the text and its visual context, making it powerful for
historical research and digital humanities.
</div>
""", unsafe_allow_html=True)
with col2:
# Add an engaging research question
st.markdown("""
<div style="background-color: #1E3A8A; color: white; padding: 0.75rem;
border-radius: 0.5rem; margin: 1rem 0; border-left: 3px solid #60A5FA;">
<h4>For Historians:</h4>
How might OCR technology transform our access to and interpretation of historical documents?
What new research questions become possible when large archives become machine-readable?
</div>
""", unsafe_allow_html=True)
# Display a sample historical document image
input_dir = Path(__file__).parent / "input"
sample_path = input_dir / "magellan-travels.jpg"
if sample_path.exists():
try:
from PIL import Image
with Image.open(sample_path) as img:
st.image(img, caption="Sample Historical Document", width=300)
except Exception:
pass
# What you'll learn section with visual learning outcomes
st.markdown('<h3>What You\'ll Learn</h3>', unsafe_allow_html=True)
# Create three columns for clean layout
col1, col2, col3 = st.columns(3)
with col1:
st.markdown("""
<div style="background-color: #1f2937; padding: 1rem; border-radius: 0.5rem;">
<h4>Conceptual Understanding</h4>
- Text-image relationships in historical documents
- Evolution of OCR technology
- AI vision models for document analysis
- Historical typography challenges
</div>
""", unsafe_allow_html=True)
with col2:
st.markdown("""
<div style="background-color: #1f2937; padding: 1rem; border-radius: 0.5rem;">
<h4>Methodological Approaches</h4>
- Critical frameworks for OCR in historical research
- Hybrid computational-traditional methods
- Error analysis and interpretation
- Contextual reading strategies
</div>
""", unsafe_allow_html=True)
with col3:
st.markdown("""
<div style="background-color: #1f2937; padding: 1rem; border-radius: 0.5rem;">
<h4>Practical Skills</h4>
- Processing historical documents with OCR
- Analyzing and structuring extracted information
- Integrating OCR into research workflows
- Building searchable archives
</div>
""", unsafe_allow_html=True)
# Module overview
st.markdown('<h3>Workshop Modules</h3>', unsafe_allow_html=True)
# First row of modules
col1, col2 = st.columns(2)
with col1:
for i in [1, 3, 5]:
st.markdown(f"""
<div style="background-color: #1f2937; border-radius: 8px; padding: 16px;
margin-bottom: 16px; border-top: 4px solid #3B82F6;">
<div style="background-color: #3B82F6; color: white; font-weight: bold;
padding: 4px 10px; border-radius: 12px; font-size: 0.9rem;
display: inline-block; margin-bottom: 8px;">Module {i}</div>
<div style="font-weight: 600; margin-bottom: 8px; font-size: 1.1rem; color: white;">
{module_names[i-1]}
</div>
<p>Module {i} of the historical OCR workshop.</p>
</div>
""", unsafe_allow_html=True)
with col2:
for i in [2, 4, 6]:
st.markdown(f"""
<div style="background-color: #1f2937; border-radius: 8px; padding: 16px;
margin-bottom: 16px; border-top: 4px solid #3B82F6;">
<div style="background-color: #3B82F6; color: white; font-weight: bold;
padding: 4px 10px; border-radius: 12px; font-size: 0.9rem;
display: inline-block; margin-bottom: 8px;">Module {i}</div>
<div style="font-weight: 600; margin-bottom: 8px; font-size: 1.1rem; color: white;">
{module_names[i-1]}
</div>
<p>Module {i} of the historical OCR workshop.</p>
</div>
""", unsafe_allow_html=True)
# Inspirational quote
st.markdown("""
<div style="font-style: italic; color: #D1D5DB; padding: 0.5rem 1rem;
border-left: 3px solid #4B5563; margin: 1rem 0;">
"The digital turn in historical research is not just about converting analog to digital;
it's about transforming how we access, analyze, and interpret the past."
<br/><br/>
<span style="font-size:0.9rem; text-align:right; display:block;">— Dr. Jane Winters, Professor of Digital Humanities</span>
</div>
""", unsafe_allow_html=True)
# Start button with enhanced styling
st.markdown('<div style="text-align: center; margin-top: 2rem;">', unsafe_allow_html=True)
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
if st.button("Begin Workshop Journey", key="start_workshop", type="primary", use_container_width=True):
st.session_state.workshop_started = True
st.rerun()
st.markdown('<p style="text-align:center; margin-top:8px; font-size:0.9rem; color:#666;">No installation required • Start immediately</p>', unsafe_allow_html=True)
st.markdown('</div>', unsafe_allow_html=True)
# Display the welcome screen (outside modules)
welcome_screen()
else:
# Get the current module to display
current_module = st.session_state.current_module
module = get_module(current_module)
# Create navigation callbacks for the page wrapper
def nav_to_prev():
if current_module > 1:
st.session_state.current_module = current_module - 1
st.rerun()
def nav_to_next():
if current_module < 6:
st.session_state.current_module = current_module + 1
st.rerun()
# Create the sidebar navigation
with st.sidebar:
st.markdown("<h1>Workshop Navigation</h1>", unsafe_allow_html=True)
# Visual header
st.markdown("<div style='display:flex; align-items:center; margin-bottom:20px;'>", unsafe_allow_html=True)
# Show a progress indicator
st.markdown(f"<div><b>Your Progress:</b> Module {current_module} of 6</div>", unsafe_allow_html=True)
st.progress(current_module / 6)
# Module navigation buttons
st.markdown("<h3>Modules</h3>", unsafe_allow_html=True)
for i, name in enumerate(module_names, 1):
btn_style = "primary" if i == current_module else "secondary"
if st.button(f"{i}: {name}", key=f"nav_module_{i}", type=btn_style, use_container_width=True):
st.session_state.current_module = i
st.rerun()
# About the workshop in a collapsible section
with st.expander("About the Workshop"):
st.markdown("""
This interactive workshop explores OCR technology for historical documents.
**How to use this workshop:**
1. Navigate through modules sequentially
2. Expand content sections to read more
3. Try the interactive OCR experiment
4. Reflect on research questions
For help or more information, use the reference materials in Module 6.
""")
# Processing history if available
if st.session_state.processing_history:
with st.expander("Your Activity"):
st.markdown(f"<b>Documents processed:</b> {len(st.session_state.processing_history)}", unsafe_allow_html=True)
# Show the most recent document processed
latest = st.session_state.processing_history[-1]
st.markdown(f"""
<div style="background:#f9f9f9; padding:8px; border-radius:4px; margin-top:10px; color:#333;">
<b>Latest document:</b> {latest['fileName']}<br>
<span style="font-size:0.9rem;">Processed with {' vision model' if latest['useVision'] else ' basic OCR'}</span>
</div>
""", unsafe_allow_html=True)
# Render the current module content using the page wrapper
page_wrapper(module.render, current_module)
# At the bottom of the page, create the hidden navigation buttons for the fixed navigation bar
if st.session_state.workshop_started:
# Previous navigation button (hidden, activated by the fixed nav)
if st.session_state.current_module > 1:
if st.button("←", key=f"nav_prev_{st.session_state.current_module-1}", label_visibility="collapsed"):
st.session_state.current_module -= 1
st.rerun()
# Next navigation button (hidden, activated by the fixed nav)
if st.session_state.current_module < 6:
if st.button("→", key=f"nav_next_{st.session_state.current_module+1}", label_visibility="collapsed"):
st.session_state.current_module += 1
st.rerun()
# Module navigation dots (hidden, activated by the fixed nav)
for i in range(1, 7):
if st.button(f"{i}", key=f"nav_dot_{i}", label_visibility="collapsed"):
st.session_state.current_module = i
st.rerun() |