import streamlit as st
from paddleocr import PaddleOCR
from PIL import Image
import numpy as np

st.set_page_config(page_title="OCR Demo", layout="centered")

st.title("📝 Simple OCR Demo")
st.markdown("""
This is a lightweight OCR demo using PaddleOCR.

**Note**: Originally intended for GLM-OCR, but that model requires GPU resources.
This demo uses PaddleOCR instead, which works on CPU.
""")

# Initialize PaddleOCR
@st.cache_resource
def load_ocr():
    try:
        ocr = PaddleOCR(use_textline_orientation=True, lang='en', use_gpu=False)
        return ocr
    except Exception as e:
        st.error(f"Error loading OCR: {e}")
        return None

with st.spinner("Loading OCR model..."):
    ocr = load_ocr()

if ocr is None:
    st.error("Failed to load OCR model. Please try refreshing.")
    st.stop()

# File uploader
uploaded_file = st.file_uploader(
    "Upload an image",
    type=["jpg", "jpeg", "png", "bmp"],
)

if uploaded_file is not None:
    # Display image
    image = Image.open(uploaded_file)
    st.image(image, caption="Uploaded Image", use_column_width=True)
    
    if st.button("Extract Text", type="primary"):
        with st.spinner("Processing..."):
            try:
                # Convert to numpy array
                img_array = np.array(image)
                
                # Run OCR
                result = ocr.ocr(img_array, cls=True)
                
                if result and result[0]:
                    st.success("Text extraction completed!")
                    
                    # Extract text
                    extracted_text = "\n".join([line[1][0] for line in result[0]])
                    
                    st.text_area("Extracted Text", value=extracted_text, height=300)
                else:
                    st.warning("No text found in the image.")
                    
            except Exception as e:
                st.error(f"Error: {str(e)}")

st.markdown("---")
st.markdown("""
**About GLM-OCR**: 
The original [GLM-OCR model](https://huggingface.co/zai-org/GLM-OCR) is a powerful 0.9B parameter 
multimodal OCR model, but requires GPU resources to run efficiently.

For CPU-only environments like Hugging Face CPU Spaces, lighter alternatives like PaddleOCR are more suitable.
""")