Spaces:
Sleeping
Sleeping
File size: 5,109 Bytes
b8b55ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
"""
Streamlit App for Text Extraction from Images
UI layer for the text extraction service.
"""
import html
import json
from pathlib import Path
import streamlit as st
from PIL import Image
from service.text_extraction_service import TextExtractionService
# Page configuration
st.set_page_config(
page_title="Text Extraction from Images",
page_icon="π",
layout="wide"
)
# Initialize session state
if "extraction_service" not in st.session_state:
st.session_state.extraction_service = None
if "extraction_result" not in st.session_state:
st.session_state.extraction_result = None
@st.cache_resource
def get_extraction_service():
"""
Get or create the text extraction service instance.
Cached to avoid reloading the model on every interaction.
"""
if st.session_state.extraction_service is None:
with st.spinner("Loading OCR model... This may take a moment."):
service = TextExtractionService()
st.session_state.extraction_service = service
return st.session_state.extraction_service
def main():
"""Main application function."""
st.title("π Text Extraction from Images")
st.markdown("Upload an image to extract text using olmOCR model.")
# Sidebar for settings
with st.sidebar:
st.header("βοΈ Settings")
max_tokens = st.slider(
"Max Tokens",
min_value=512,
max_value=4096,
value=2048,
step=256,
help="Maximum number of tokens to generate. Higher values allow longer text extraction."
)
# File uploader
uploaded_file = st.file_uploader(
"Choose an image file",
type=["png", "jpg", "jpeg"],
help="Upload an image file (PNG, JPG, JPEG)"
)
if uploaded_file is not None:
# Display uploaded image
st.subheader("π· Uploaded Image")
image = Image.open(uploaded_file)
st.image(image)
st.caption(f"File: {uploaded_file.name}")
st.divider()
# Extract button
st.subheader("π Text Extraction")
if st.button("π Extract Text", type="primary"):
try:
# Get extraction service
service = get_extraction_service()
# Extract text
with st.spinner("Extracting text from image... This may take a while."):
result = service.extract_text_from_image(
image,
max_new_tokens=max_tokens
)
# Store result in session state
st.session_state.extraction_result = result
st.session_state.extraction_result["source_image"] = uploaded_file.name
except Exception as e:
st.error(f"β Error during extraction: {str(e)}")
st.exception(e)
# Display results if available
if st.session_state.extraction_result:
st.divider()
result = st.session_state.extraction_result
st.subheader("π Extracted Text")
# Display extracted text with black color
extracted_text = result.get("extracted_text", "")
# Escape HTML to prevent injection and ensure proper display
escaped_text = html.escape(extracted_text)
st.markdown(
f'<div style="background-color: #f0f2f6; padding: 15px; border-radius: 5px; max-height: 300px; overflow-y: auto; color: #000000; white-space: pre-wrap; font-family: monospace;">{escaped_text}</div>',
unsafe_allow_html=True
)
# Display metadata (full JSON)
with st.expander("π Full JSON Metadata"):
st.json(result)
# Download JSON button
json_str = json.dumps(result, ensure_ascii=False, indent=2)
st.download_button(
label="πΎ Download JSON",
data=json_str,
file_name=f"{Path(uploaded_file.name).stem}.json",
mime="application/json"
)
else:
# Show instructions when no file is uploaded
st.info("π Please upload an image file to get started.")
# Example section
with st.expander("βΉοΈ How to use"):
st.markdown("""
1. **Upload an image**: Click on the file uploader and select an image file (PNG, JPG, JPEG)
2. **Adjust settings** (optional): Use the sidebar to adjust max tokens if needed
3. **Extract text**: Click the "Extract Text" button
4. **View results**: The extracted text and metadata will be displayed
5. **Download**: Download the results as JSON if needed
**Note**: The first extraction may take longer as the model needs to be loaded.
Subsequent extractions will be faster.
""")
if __name__ == "__main__":
main()
|