import streamlit as st
from pathlib import Path
from layout import gray_container, tool_container, key_concept, quote
def render():
"""Module 4: Methodological Approaches"""
st.title("Module 4: Methodological Approaches")
col1, col2 = st.columns([1, 1])
with col1:
hybrid_content = """
Hybrid Methodologies
1. Computational + Human Reading
- OCR for initial processing and discovery
- Human review for context and interpretation
- Iterative refinement of computational outputs
2. Close + Distant Reading
- Distant reading through large-scale OCR processing
- Close reading of selected passages
- Zooming between scales of analysis
"""
gray_container(hybrid_content)
# Check if the diagram image is available and display it
input_dir = Path(__file__).parent.parent / "input"
diagram_path = input_dir / "diagram.jpg"
if diagram_path.exists():
try:
from PIL import Image
with Image.open(diagram_path) as img:
st.image(img, caption="Historical VLM architecture", use_column_width=True)
except Exception:
# If there's an error, just show a placeholder
st.image("https://placekitten.com/800/400", caption="Historical VLM architecture placeholder")
else:
# If the file doesn't exist, show a placeholder
st.image("https://placekitten.com/800/400", caption="Historical VLM architecture placeholder")
with col2:
mistral_content = """
Mistral-OCR-Latest: State-of-the-Art
The Mistral-OCR model represents a significant advancement:
- Multimodal Understanding: Processes both visual and textual information
- Contextual Awareness: Considers historical context
- Layout Recognition: Preserves complex document structures
- Historical Font Adaptation: Trained on diverse historical typography
"""
gray_container(mistral_content)
# Check if the workflow image is available and display it
workflow_path = input_dir / "workflow.jpg"
if workflow_path.exists():
try:
from PIL import Image
with Image.open(workflow_path) as img:
st.image(img, caption="Mistral OCR workflow", use_column_width=True)
except Exception:
# If there's an error, just show a placeholder
st.image("https://placekitten.com/800/400", caption="Mistral OCR workflow placeholder")
else:
# If the file doesn't exist, show a placeholder
st.image("https://placekitten.com/800/400", caption="Mistral OCR workflow placeholder")
# Practical workflow section
workflow_content = """
Practical Workflow
A typical historical OCR workflow with Mistral-OCR includes:
- Selection: Choosing appropriate documents
- Preprocessing: Enhancing images before OCR
- OCR Processing: Running documents through vision-enhanced OCR
- Post-processing: Cleaning up outputs and structured extraction
- Verification: Cross-checking results against originals
- Integration: Incorporating OCR outputs into research materials
"""
tool_container(workflow_content)
# Methodological considerations
st.subheader("Methodological Considerations")
col1, col2 = st.columns([1, 1])
with col1:
advantages_content = """
Advantages of Hybrid Approaches
- Balance between automation and expert judgment
- Ability to process large volumes while preserving detail
- Context-sensitive analysis of complex documents
- Iterative improvement of results
"""
gray_container(advantages_content)
with col2:
limitations_content = """
Limitations and Challenges
- OCR errors requiring expert correction
- Bias in training data affecting recognition
- Complexity in evaluating OCR quality
- Technical infrastructure requirements
"""
gray_container(limitations_content)
# Quote
quote_content = "The most powerful digital humanities work occurs at the intersection of computational methods and traditional humanistic inquiry."
quote(quote_content, "Dr. Sarah E. Bond, Digital Humanities Scholar")