Spaces:

twoimo
/

glm-ocr-demo

Runtime error

App Files Files Community

twoimo commited on Feb 3

Commit

cf548cf

verified ·

1 Parent(s): cdf6029

Create app.py

Browse files

Files changed (1) hide show

app.py +85 -0

app.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import gradio as gr
+from transformers import AutoProcessor, AutoModelForImageTextToText
+import torch
+from PIL import Image
+# Model setup
+MODEL_PATH = "zai-org/GLM-OCR"
+try:
+    processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
+    model = AutoModelForImageTextToText.from_pretrained(
+        MODEL_PATH,
+        torch_dtype="auto",
+        device_map="auto",
+        trust_remote_code=True,
+    )
+except Exception as e:
+    print(f"Error loading model: {e}")
+    model = None
+    processor = None
+def process_image(image):
+    if model is None or processor is None:
+        return "Error: Model not loaded. Please check your connection or try again later."
+    try:
+        # Prepare the input
+        messages = [{
+            "role": "user",
+            "content": [
+                {"type": "image", "image": image},
+                {"type": "text", "text": "Text Recognition:"}
+            ],
+        }]
+        # Process and generate
+        inputs = processor.apply_chat_template(
+            messages, tokenize=True, add_generation_prompt=True,
+            return_dict=True, return_tensors="pt"
+        ).to(model.device)
+        # Remove token_type_ids if present
+        inputs.pop("token_type_ids", None)
+        # Generate output
+        with torch.no_grad():
+            generated_ids = model.generate(**inputs, max_new_tokens=2048)
+        # Decode output
+        output_text = processor.decode(
+            generated_ids[0][inputs["input_ids"].shape[1]:],
+            skip_special_tokens=True,
+        )
+        return output_text
+    except Exception as e:
+        return f"Error processing image: {str(e)}"
+# Create Gradio interface
+with gr.Blocks(title="GLM-OCR Demo") as demo:
+    gr.Markdown("# GLM-OCR: Multimodal OCR Model")
+    gr.Markdown("Upload an image to extract text using the GLM-OCR model.")
+    with gr.Row():
+        input_image = gr.Image(type="pil", label="Input Image")
+        output_text = gr.Textbox(label="Extracted Text", lines=10)
+    submit_btn = gr.Button("Extract Text", variant="primary")
+    submit_btn.click(
+        fn=process_image,
+        inputs=input_image,
+        outputs=output_text
+    )
+    gr.Examples(
+        examples=[
+            ["https://huggingface.co/spaces/twoimo/glm-ocr-demo/file=/tmp/tmpqr9q1h5g.png"],
+        ],
+        inputs=input_image,
+        label="Example Images (if available)"
+    )
+if __name__ == "__main__":
+    demo.launch()