Spaces:

milwright
/

historical-ocr

Running

App Files Files Community

historical-ocr / modules /content /module3.py

milwright

submit pull for merge

85bdb4e verified 11 months ago

raw

history blame

4.61 kB

	import streamlit as st
	from pathlib import Path
	from layout import gray_container, tool_container, key_concept, research_question

	def render():
	"""Module 3: OCR Technology and Historical Documents"""

	st.title("Module 3: OCR Technology and Historical Documents")

	col1, col2 = st.columns([1, 1])

	with col1:
	traditional_content = """
	<h3>Traditional OCR Approaches</h3>
	<ol>
	<li><strong>Pattern Matching</strong>: Early OCR compared characters to templates</li>
	<li><strong>Feature Extraction</strong>: Identifying key features of characters</li>
	<li><strong>Statistical Models</strong>: Using probabilities to improve recognition</li>
	</ol>
	"""
	gray_container(traditional_content)

	modern_content = """
	<h3>Modern AI-Enhanced OCR</h3>
	<ol>
	<li><strong>Neural Networks</strong>: Deep learning models trained on vast datasets</li>
	<li><strong>Computer Vision</strong>: Advanced image processing techniques</li>
	<li><strong>Language Models</strong>: Contextual understanding to resolve ambiguities</li>
	<li><strong>Multimodal Models</strong>: Integration of text, layout, and visual understanding</li>
	</ol>
	"""
	gray_container(modern_content)

	with col2:
	challenges_content = """
	<h3>Challenges with Historical Documents</h3>
	<p>Historical materials present unique difficulties:</p>
	<ul>
	<li><strong>Typography Variation</strong>: Non-standardized fonts and styles</li>
	<li><strong>Historical Language</strong>: Archaic vocabulary and grammar</li>
	<li><strong>Layout Complexity</strong>: Non-linear arrangements</li>
	<li><strong>Document Degradation</strong>: Fading, tears, stains, and damage</li>
	<li><strong>Material Artifacts</strong>: Paper texture, binding shadows, etc.</li>
	</ul>
	"""
	gray_container(challenges_content)

	# Display OCR processing diagram
	st.image("https://cdn.dribbble.com/users/412119/screenshots/16353886/media/82e593c60a5e4d460db917236eab6ece.jpg",
	caption="OCR processing layers")

	# Key concept section
	concept_content = """
	<h3>Vision-Enhanced OCR</h3>
	<p>Modern OCR systems like those based on Mistral-7B-Vision combine:</p>
	<ol>
	<li>Image understanding capabilities to process the visual aspects</li>
	<li>Text recognition to extract characters accurately</li>
	<li>Layout analysis to understand structure</li>
	<li>Contextual language processing for improved accuracy</li>
	</ol>
	<p>This multimodal approach dramatically improves OCR results on historical documents compared to traditional OCR.</p>
	"""
	key_concept(concept_content)

	# Technical details in a tool container
	tech_content = """
	<h3>Technical Evolution of OCR</h3>
	<p><strong>Traditional OCR Pipeline:</strong></p>
	<ol>
	<li>Preprocessing (binarization, noise removal)</li>
	<li>Layout analysis (segmentation)</li>
	<li>Character recognition (pattern matching)</li>
	<li>Post-processing (spell checking)</li>
	</ol>

	<p><strong>Modern LLM-Vision Pipeline:</strong></p>
	<ol>
	<li>Image normalization</li>
	<li>Image embedding via vision encoder</li>
	<li>Integration with language model</li>
	<li>Joint inference across modalities</li>
	<li>Structured extraction of information</li>
	</ol>
	"""
	tool_container(tech_content)

	# Research question
	research_content = """
	<h4>Consider This:</h4>
	<p>How might the capabilities of vision-language models change our approach to digitizing historical archives?</p>
	"""
	research_question(research_content)

	# Display history if available
	if 'processing_history' in st.session_state and st.session_state.processing_history:
	with st.expander("Your OCR Processing History"):
	st.markdown("You've already processed the following documents:")

	for item in st.session_state.processing_history:
	st.markdown(f"{item['fileName']}")
	col1, col2 = st.columns(2)
	with col1:
	st.write(f"Topics: {', '.join(item['result'].get('topics', ['Unknown']))}")
	with col2:
	st.write(f"Vision model used: {'Yes' if item['useVision'] else 'No'}")