Spaces:

twoimo
/

glm-ocr-demo

Runtime error

App Files Files Community

twoimo commited on Feb 3

Commit

ff85888

verified ·

1 Parent(s): 8a5fed5

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -60

app.py CHANGED Viewed

@@ -1,86 +1,73 @@
 import streamlit as st
-from transformers import AutoProcessor, AutoModelForImageTextToText
-import torch
 from PIL import Image
-import io
-st.set_page_config(page_title="GLM-OCR", layout="centered")
-st.title("🎯 GLM-OCR: Multimodal OCR Model")
-st.markdown("Upload an image to extract text using the GLM-OCR model.")
-# Load model with caching
 @st.cache_resource
-def load_model():
     try:
-        MODEL_PATH = "zai-org/GLM-OCR"
-        processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
-        model = AutoModelForImageTextToText.from_pretrained(
-            MODEL_PATH,
-            torch_dtype=torch.float16,
-            device_map="auto",
-            trust_remote_code=True,
-        )
-        return processor, model
     except Exception as e:
-        st.error(f"Error loading model: {str(e)}")
-        return None, None
-# Load model
-with st.spinner("Loading GLM-OCR model... This may take a moment."):
-    processor, model = load_model()
-if processor is None or model is None:
-    st.error("Failed to load the model. Please try refreshing the page.")
     st.stop()
 # File uploader
 uploaded_file = st.file_uploader(
-    "Choose an image",
-    type=["jpg", "jpeg", "png", "bmp", "gif"],
 )
 if uploaded_file is not None:
-    # Display the image
-    image = Image.open(uploaded_file).convert("RGB")
     st.image(image, caption="Uploaded Image", use_column_width=True)
-    # Process the image
     if st.button("Extract Text", type="primary"):
-        with st.spinner("Processing image... Please wait."):
             try:
-                # Prepare input
-                messages = [{
-                    "role": "user",
-                    "content": [
-                        {"type": "image", "image": image},
-                        {"type": "text", "text": "Text Recognition:"}
-                    ],
-                }]
-                # Process
-                inputs = processor.apply_chat_template(
-                    messages, tokenize=True, add_generation_prompt=True,
-                    return_dict=True, return_tensors="pt"
-                ).to(model.device)
-                inputs.pop("token_type_ids", None)
-                # Generate
-                with torch.no_grad():
-                    generated_ids = model.generate(**inputs, max_new_tokens=2048)
-                # Decode
-                output_text = processor.decode(
-                    generated_ids[0][inputs["input_ids"].shape[1]:],
-                    skip_special_tokens=True,
-                )
-                st.success("Text extraction completed!")
-                st.text_area("Extracted Text", value=output_text, height=300)
             except Exception as e:
-                st.error(f"Error processing image: {str(e)}")
 st.markdown("---")
-st.markdown("Powered by GLM-OCR from [ZAI](https://huggingface.co/zai-org)")

 import streamlit as st
+from paddleocr import PaddleOCR
 from PIL import Image
+import numpy as np
+st.set_page_config(page_title="OCR Demo", layout="centered")
+st.title("📝 Simple OCR Demo")
+st.markdown("""
+This is a lightweight OCR demo using PaddleOCR.
+**Note**: Originally intended for GLM-OCR, but that model requires GPU resources.
+This demo uses PaddleOCR instead, which works on CPU.
+""")
+# Initialize PaddleOCR
 @st.cache_resource
+def load_ocr():
     try:
+        ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False)
+        return ocr
     except Exception as e:
+        st.error(f"Error loading OCR: {e}")
+        return None
+with st.spinner("Loading OCR model..."):
+    ocr = load_ocr()
+if ocr is None:
+    st.error("Failed to load OCR model. Please try refreshing.")
     st.stop()
 # File uploader
 uploaded_file = st.file_uploader(
+    "Upload an image",
+    type=["jpg", "jpeg", "png", "bmp"],
 )
 if uploaded_file is not None:
+    # Display image
+    image = Image.open(uploaded_file)
     st.image(image, caption="Uploaded Image", use_column_width=True)
     if st.button("Extract Text", type="primary"):
+        with st.spinner("Processing..."):
             try:
+                # Convert to numpy array
+                img_array = np.array(image)
+                # Run OCR
+                result = ocr.ocr(img_array, cls=True)
+                if result and result[0]:
+                    st.success("Text extraction completed!")
+                    # Extract text
+                    extracted_text = "\n".join([line[1][0] for line in result[0]])
+                    st.text_area("Extracted Text", value=extracted_text, height=300)
+                else:
+                    st.warning("No text found in the image.")
             except Exception as e:
+                st.error(f"Error: {str(e)}")
 st.markdown("---")
+st.markdown("""
+**About GLM-OCR**:
+The original [GLM-OCR model](https://huggingface.co/zai-org/GLM-OCR) is a powerful 0.9B parameter
+multimodal OCR model, but requires GPU resources to run efficiently.
+For CPU-only environments like Hugging Face CPU Spaces, lighter alternatives like PaddleOCR are more suitable.
+""")