import streamlit as st from paddleocr import PaddleOCR from PIL import Image import numpy as np st.set_page_config(page_title="OCR Demo", layout="centered") st.title("📝 Simple OCR Demo") st.markdown(""" This is a lightweight OCR demo using PaddleOCR. **Note**: Originally intended for GLM-OCR, but that model requires GPU resources. This demo uses PaddleOCR instead, which works on CPU. """) # Initialize PaddleOCR @st.cache_resource def load_ocr(): try: ocr = PaddleOCR(use_textline_orientation=True, lang='en', use_gpu=False) return ocr except Exception as e: st.error(f"Error loading OCR: {e}") return None with st.spinner("Loading OCR model..."): ocr = load_ocr() if ocr is None: st.error("Failed to load OCR model. Please try refreshing.") st.stop() # File uploader uploaded_file = st.file_uploader( "Upload an image", type=["jpg", "jpeg", "png", "bmp"], ) if uploaded_file is not None: # Display image image = Image.open(uploaded_file) st.image(image, caption="Uploaded Image", use_column_width=True) if st.button("Extract Text", type="primary"): with st.spinner("Processing..."): try: # Convert to numpy array img_array = np.array(image) # Run OCR result = ocr.ocr(img_array, cls=True) if result and result[0]: st.success("Text extraction completed!") # Extract text extracted_text = "\n".join([line[1][0] for line in result[0]]) st.text_area("Extracted Text", value=extracted_text, height=300) else: st.warning("No text found in the image.") except Exception as e: st.error(f"Error: {str(e)}") st.markdown("---") st.markdown(""" **About GLM-OCR**: The original [GLM-OCR model](https://huggingface.co/zai-org/GLM-OCR) is a powerful 0.9B parameter multimodal OCR model, but requires GPU resources to run efficiently. For CPU-only environments like Hugging Face CPU Spaces, lighter alternatives like PaddleOCR are more suitable. """)