Spaces:

HarryLovesCode
/

ocr-test

Runtime error

App Files Files Community

HarryLovesCode commited on 24 days ago

Commit

1d8fe68

1 Parent(s): cb6f32c

test

Browse files

Files changed (2) hide show

app.py +86 -4
requirements.txt +8 -0

app.py CHANGED Viewed

@@ -1,7 +1,89 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
+from PIL import Image
+import numpy as np
+# Transformers imports are deferred to avoid requiring heavy packages when
+# NO_MODEL_LOAD is set. The module-level imports happen only if we actually
+# need to load the model. This makes tests and CI simpler.
+import tempfile
+import os
+import shutil
+# Allow delaying heavy model load if the environment variable NO_MODEL_LOAD is set
+if os.environ.get('NO_MODEL_LOAD'):
+    tokenizer = None
+    model = None
+else:
+    # Import heavy transformer classes lazily
+    from transformers import AutoModel, AutoTokenizer
+    tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
+    try:
+        model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
+        model = model.eval()
+    except Exception as e:
+        # If model fails to load (e.g. due to no network or heavy resources), keep a placeholder
+        model = None
+def process_image(image):
+    """Saves an uploaded image to a temporary file and runs `model.chat(tokenizer, image_file, ocr_type='ocr')`.
+    Returns the model output as a string. If the model is unavailable or an
+    exception occurs, returns an informative error string.
+    """
+    if image is None:
+        return "No image provided."
+    # Convert numpy arrays to PIL Image if needed
+    if isinstance(image, np.ndarray):
+        pil_img = Image.fromarray(image)
+    else:
+        pil_img = image
+    # Save the image to a temp file (model.chat expects a path)
+    tmpfile = None
+    try:
+        tmp = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
+        tmpfile = tmp.name
+        pil_img.save(tmpfile, format='JPEG')
+        tmp.close()
+        if model is None or not hasattr(model, 'chat'):
+            return "Model not available or does not implement `chat`."
+        # Call the model.chat method using an image file path (as requested)
+        res = model.chat(tokenizer, tmpfile, ocr_type='ocr')
+        # Try to give a human-readable string
+        try:
+            return str(res)
+        except Exception:
+            return f"Model returned an object of type {type(res)}: {res}"
+    except Exception as e:
+        return f"Error processing image: {repr(e)}"
+    finally:
+        # Clean up temp file
+        if tmpfile and os.path.exists(tmpfile):
+            try:
+                os.remove(tmpfile)
+            except Exception:
+                pass
+def _launch_demo():
+    """Create a Gradio Blocks UI and launch it. The interface contains an image
+    uploader, a 'Process' button, and a text output box which displays the
+    OCR/chat results from the loaded model.
+    """
+    with gr.Blocks(title="OCR Processing Demo") as demo:
+        gr.Markdown("## OCR Processing Demo\nUpload an image and press **Process** to run the OCR model.")
+        with gr.Row():
+            image_input = gr.Image(type='pil', label='Upload Image')
+            output_text = gr.Textbox(label='Detected text / model output', lines=8)
+        process_btn = gr.Button('Process')
+        process_btn.click(fn=process_image, inputs=image_input, outputs=output_text)
+    return demo
+if __name__ == "__main__":
+    demo = _launch_demo()
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio
+transformers
+torch
+Pillow
+numpy
+safetensorsgradio
+pillow
+numpy