Spaces:

milwright
/

historical-ocr

Running

App Files Files Community

historical-ocr / modules /content /module6.py

milwright

submit pull for merge

85bdb4e verified 11 months ago

raw

history blame

7.4 kB

	import streamlit as st
	from layout import gray_container, key_concept, quote, tool_container
	from datetime import datetime

	def render():
	"""Module 6: Conclusion and Future Directions"""

	st.title("Module 6: Conclusion and Future Directions")

	col1, col2 = st.columns([3, 2])

	with col1:
	summary_content = """
	<h3>Workshop Summary</h3>
	<p>Throughout this workshop, we've explored:</p>
	<ol>
	<li><strong>Text-Image Interdependence</strong>: The complex relationship between textual and visual elements</li>
	<li><strong>OCR Technology</strong>: The evolution of OCR and its application to historical materials</li>
	<li><strong>Methodological Approaches</strong>: Hybrid strategies for working with historical texts</li>
	<li><strong>Practical Application</strong>: Hands-on experience with OCR processing tools</li>
	</ol>
	"""
	gray_container(summary_content)

	takeaways_content = """
	<h3>Key Takeaways</h3>
	<ol>
	<li><strong>OCR is Not Perfect</strong>: Even advanced AI models face challenges with historical documents</li>
	<li><strong>Context Matters</strong>: Vision-enhanced models provide better results by understanding document context</li>
	<li><strong>Hybrid Approaches</strong>: Combining computational methods with traditional research yields best results</li>
	<li><strong>Critical Evaluation</strong>: Always evaluate OCR outputs with awareness of limitations</li>
	<li><strong>Structured Extraction</strong>: Modern OCR goes beyond text recognition to understand document structure</li>
	</ol>
	"""
	gray_container(takeaways_content)

	with col2:
	# Display workshop statistics if there's processing history
	if 'processing_history' in st.session_state and st.session_state.processing_history:
	st.subheader("Your Workshop Statistics")

	# Calculate statistics
	total_docs = len(st.session_state.processing_history)
	vision_docs = len([item for item in st.session_state.processing_history if item['useVision']])
	non_vision_docs = total_docs - vision_docs

	# Create metrics for statistics
	col1, col2 = st.columns(2)

	with col1:
	st.metric("Documents Processed", total_docs)
	st.metric("With Vision Model", vision_docs)

	with col2:
	st.metric("Without Vision Model", non_vision_docs)

	# Topics word cloud
	if total_docs > 0:
	st.subheader("Topics Encountered")
	all_topics = []
	for item in st.session_state.processing_history:
	if 'topics' in item['result']:
	all_topics.extend(item['result']['topics'])

	if all_topics:
	# Count topic frequencies
	topic_counts = {}
	for topic in all_topics:
	if topic in topic_counts:
	topic_counts[topic] += 1
	else:
	topic_counts[topic] = 1

	# Display as a horizontal bar chart
	st.bar_chart(topic_counts)
	else:
	# Show placeholder stats
	placeholder_content = """
	<h3>Workshop Outcomes</h3>
	<p>Complete the interactive OCR experiment in Module 5 to generate your personal workshop statistics.</p>
	<p>You'll be able to see:</p>
	<ul>
	<li>Number of documents processed</li>
	<li>Comparison of vision vs. non-vision models</li>
	<li>Topics identified across your documents</li>
	<li>Performance metrics for your processing tasks</li>
	</ul>
	"""
	tool_container(placeholder_content)

	# Future directions section
	st.subheader("Future Directions")

	col1, col2 = st.columns(2)

	with col1:
	tech_content = """
	<h3>Technological Developments</h3>
	<ul>
	<li><strong>Multimodal AI models</strong>: Increasingly sophisticated understanding</li>
	<li><strong>Historical font training</strong>: Models trained on historical typography</li>
	<li><strong>Document intelligence</strong>: Enhanced understanding of structures</li>
	<li><strong>Collaborative correction</strong>: Platforms for collective improvement</li>
	</ul>
	"""
	gray_container(tech_content)

	with col2:
	research_content = """
	<h3>Research Applications</h3>
	<ul>
	<li><strong>Large-scale corpus analysis</strong>: Processing entire archives</li>
	<li><strong>Multilingual historical research</strong>: Working across languages</li>
	<li><strong>Image-text integration</strong>: New methodologies for visual analysis</li>
	<li><strong>Computational paleography</strong>: AI-assisted handwriting analysis</li>
	</ul>
	"""
	gray_container(research_content)

	# Inspiring quote
	quote_content = "The digital humanities are not about building, they're about sharing. The digital humanities are not about the digital at all. They're all about innovation and disruption. The digital humanities are really an insurgent humanities."
	quote(quote_content, "Matthew Kirschenbaum, Professor of Digital Humanities")

	# Additional resources
	resources_content = """
	<h3>Additional Resources</h3>
	<ul>
	<li><a href="https://docs.mistral.ai/" target="_blank">Mistral AI Documentation</a>: Learn more about the OCR models used in this workshop</li>
	<li><a href="https://readcoop.eu/transkribus/" target="_blank">Transkribus</a>: Platform for historical document transcription</li>
	<li><a href="https://ocr-d.de/en/" target="_blank">OCR-D</a>: Coordinated OCR research project for historical documents</li>
	<li><a href="https://scholar.google.com/scholar?q=historical+OCR" target="_blank">Historical OCR Research Papers</a>: Academic research on historical OCR</li>
	</ul>
	"""
	tool_container(resources_content)

	# Acknowledgments
	st.subheader("Acknowledgments")

	acknowledgment_content = """
	<p>This workshop was designed as an educational resource for historians, archivists, and digital humanities scholars.</p>
	<p>It demonstrates the integration of modern AI vision-language models with historical research methodologies.</p>
	<p>Special thanks to the digital humanities community for continued innovation in computational approaches to historical research.</p>
	"""
	st.markdown(acknowledgment_content, unsafe_allow_html=True)

	# Restart the workshop button
	if st.button("Start Workshop Again", use_container_width=True):
	# Reset the session state to start the workshop again
	if 'current_module' in st.session_state:
	st.session_state.current_module = 1

	# Do not reset the processing history

	st.experimental_rerun()