File size: 4,612 Bytes
85bdb4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import streamlit as st
from pathlib import Path
from layout import gray_container, tool_container, key_concept, research_question

def render():
    """Module 3: OCR Technology and Historical Documents"""
    
    st.title("Module 3: OCR Technology and Historical Documents")
    
    col1, col2 = st.columns([1, 1])
    
    with col1:
        traditional_content = """
        <h3>Traditional OCR Approaches</h3>
        <ol>
            <li><strong>Pattern Matching</strong>: Early OCR compared characters to templates</li>
            <li><strong>Feature Extraction</strong>: Identifying key features of characters</li>
            <li><strong>Statistical Models</strong>: Using probabilities to improve recognition</li>
        </ol>
        """
        gray_container(traditional_content)
        
        modern_content = """
        <h3>Modern AI-Enhanced OCR</h3>
        <ol>
            <li><strong>Neural Networks</strong>: Deep learning models trained on vast datasets</li>
            <li><strong>Computer Vision</strong>: Advanced image processing techniques</li>
            <li><strong>Language Models</strong>: Contextual understanding to resolve ambiguities</li>
            <li><strong>Multimodal Models</strong>: Integration of text, layout, and visual understanding</li>
        </ol>
        """
        gray_container(modern_content)
    
    with col2:
        challenges_content = """
        <h3>Challenges with Historical Documents</h3>
        <p>Historical materials present unique difficulties:</p>
        <ul>
            <li><strong>Typography Variation</strong>: Non-standardized fonts and styles</li>
            <li><strong>Historical Language</strong>: Archaic vocabulary and grammar</li>
            <li><strong>Layout Complexity</strong>: Non-linear arrangements</li>
            <li><strong>Document Degradation</strong>: Fading, tears, stains, and damage</li>
            <li><strong>Material Artifacts</strong>: Paper texture, binding shadows, etc.</li>
        </ul>
        """
        gray_container(challenges_content)
        
        # Display OCR processing diagram
        st.image("https://cdn.dribbble.com/users/412119/screenshots/16353886/media/82e593c60a5e4d460db917236eab6ece.jpg", 
                caption="OCR processing layers")
    
    # Key concept section
    concept_content = """
    <h3>Vision-Enhanced OCR</h3>
    <p>Modern OCR systems like those based on Mistral-7B-Vision combine:</p>
    <ol>
        <li>Image understanding capabilities to process the visual aspects</li>
        <li>Text recognition to extract characters accurately</li>
        <li>Layout analysis to understand structure</li>
        <li>Contextual language processing for improved accuracy</li>
    </ol>
    <p>This multimodal approach dramatically improves OCR results on historical documents compared to traditional OCR.</p>
    """
    key_concept(concept_content)
    
    # Technical details in a tool container
    tech_content = """
    <h3>Technical Evolution of OCR</h3>
    <p><strong>Traditional OCR Pipeline:</strong></p>
    <ol>
        <li>Preprocessing (binarization, noise removal)</li>
        <li>Layout analysis (segmentation)</li>
        <li>Character recognition (pattern matching)</li>
        <li>Post-processing (spell checking)</li>
    </ol>
    
    <p><strong>Modern LLM-Vision Pipeline:</strong></p>
    <ol>
        <li>Image normalization</li>
        <li>Image embedding via vision encoder</li>
        <li>Integration with language model</li>
        <li>Joint inference across modalities</li>
        <li>Structured extraction of information</li>
    </ol>
    """
    tool_container(tech_content)
    
    # Research question
    research_content = """
    <h4>Consider This:</h4>
    <p>How might the capabilities of vision-language models change our approach to digitizing historical archives?</p>
    """
    research_question(research_content)
    
    # Display history if available
    if 'processing_history' in st.session_state and st.session_state.processing_history:
        with st.expander("Your OCR Processing History"):
            st.markdown("You've already processed the following documents:")
            
            for item in st.session_state.processing_history:
                st.markdown(f"**{item['fileName']}**")
                col1, col2 = st.columns(2)
                with col1:
                    st.write(f"**Topics:** {', '.join(item['result'].get('topics', ['Unknown']))}")
                with col2:
                    st.write(f"**Vision model used:** {'Yes' if item['useVision'] else 'No'}")