Spaces:

twoimo
/

glm-ocr-demo

Runtime error

App Files Files Community

twoimo commited on Feb 3

Commit

6e0dd90

verified ·

1 Parent(s): e9e3585

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -65

app.py CHANGED Viewed

@@ -1,77 +1,86 @@
-import gradio as gr
 from transformers import AutoProcessor, AutoModelForImageTextToText
 import torch
 from PIL import Image
-# Model setup with error handling
-MODEL_PATH = "zai-org/GLM-OCR"
-model = None
-processor = None
-try:
-    print(f"Loading processor from {MODEL_PATH}...")
-    processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
-    print(f"Loading model from {MODEL_PATH}...")
-    model = AutoModelForImageTextToText.from_pretrained(
-        MODEL_PATH,
-        torch_dtype="auto",
-        device_map="auto",
-        trust_remote_code=True,
-    )
-    print("Model loaded successfully!")
-except Exception as e:
-    print(f"Error loading model: {e}")
-    model = None
-    processor = None
-def process_image(image):
-    """Process image and extract text using GLM-OCR"""
-    if model is None or processor is None:
-        return "Error: Model not loaded. Please refresh the page and try again."
     try:
-        if isinstance(image, str):
-            image = Image.open(image).convert("RGB")
-        elif not isinstance(image, Image.Image):
-            image = Image.fromarray(image).convert("RGB")
-        messages = [{
-            "role": "user",
-            "content": [
-                {"type": "image", "image": image},
-                {"type": "text", "text": "Text Recognition:"}
-            ],
-        }]
-        inputs = processor.apply_chat_template(
-            messages, tokenize=True, add_generation_prompt=True,
-            return_dict=True, return_tensors="pt"
-        ).to(model.device)
-        inputs.pop("token_type_ids", None)
-        with torch.no_grad():
-            generated_ids = model.generate(**inputs, max_new_tokens=2048)
-        output_text = processor.decode(
-            generated_ids[0][inputs["input_ids"].shape[1]:],
-            skip_special_tokens=True,
         )
-        return output_text
     except Exception as e:
-        return f"Error processing image: {str(e)}"
-# Simple Gradio Interface
-demo = gr.Interface(
-    fn=process_image,
-    inputs=gr.Image(type="pil", label="Upload Image"),
-    outputs=gr.Textbox(label="Extracted Text"),
-    title="GLM-OCR: Multimodal OCR Model",
-    description="Upload an image to extract text using the GLM-OCR model.",
-    allow_flagging="never"
 )
-if __name__ == "__main__":
-    demo.launch()

+import streamlit as st
 from transformers import AutoProcessor, AutoModelForImageTextToText
 import torch
 from PIL import Image
+import io
+st.set_page_config(page_title="GLM-OCR", layout="centered")
+st.title("🎯 GLM-OCR: Multimodal OCR Model")
+st.markdown("Upload an image to extract text using the GLM-OCR model.")
+# Load model with caching
+@st.cache_resource
+def load_model():
     try:
+        MODEL_PATH = "zai-org/GLM-OCR"
+        processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
+        model = AutoModelForImageTextToText.from_pretrained(
+            MODEL_PATH,
+            torch_dtype=torch.float16,
+            device_map="auto",
+            trust_remote_code=True,
         )
+        return processor, model
     except Exception as e:
+        st.error(f"Error loading model: {str(e)}")
+        return None, None
+# Load model
+with st.spinner("Loading GLM-OCR model... This may take a moment."):
+    processor, model = load_model()
+if processor is None or model is None:
+    st.error("Failed to load the model. Please try refreshing the page.")
+    st.stop()
+# File uploader
+uploaded_file = st.file_uploader(
+    "Choose an image",
+    type=["jpg", "jpeg", "png", "bmp", "gif"],
 )
+if uploaded_file is not None:
+    # Display the image
+    image = Image.open(uploaded_file).convert("RGB")
+    st.image(image, caption="Uploaded Image", use_column_width=True)
+    # Process the image
+    if st.button("Extract Text", type="primary"):
+        with st.spinner("Processing image... Please wait."):
+            try:
+                # Prepare input
+                messages = [{
+                    "role": "user",
+                    "content": [
+                        {"type": "image", "image": image},
+                        {"type": "text", "text": "Text Recognition:"}
+                    ],
+                }]
+                # Process
+                inputs = processor.apply_chat_template(
+                    messages, tokenize=True, add_generation_prompt=True,
+                    return_dict=True, return_tensors="pt"
+                ).to(model.device)
+                inputs.pop("token_type_ids", None)
+                # Generate
+                with torch.no_grad():
+                    generated_ids = model.generate(**inputs, max_new_tokens=2048)
+                # Decode
+                output_text = processor.decode(
+                    generated_ids[0][inputs["input_ids"].shape[1]:],
+                    skip_special_tokens=True,
+                )
+                st.success("Text extraction completed!")
+                st.text_area("Extracted Text", value=output_text, height=300)
+            except Exception as e:
+                st.error(f"Error processing image: {str(e)}")
+st.markdown("---")
+st.markdown("Powered by GLM-OCR from [ZAI](https://huggingface.co/zai-org)")