HarryLovesCode commited on
Commit
1d8fe68
·
1 Parent(s): cb6f32c
Files changed (2) hide show
  1. app.py +86 -4
  2. requirements.txt +8 -0
app.py CHANGED
@@ -1,7 +1,89 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from PIL import Image
3
+ import numpy as np
4
+ # Transformers imports are deferred to avoid requiring heavy packages when
5
+ # NO_MODEL_LOAD is set. The module-level imports happen only if we actually
6
+ # need to load the model. This makes tests and CI simpler.
7
+ import tempfile
8
+ import os
9
+ import shutil
10
 
11
+ # Allow delaying heavy model load if the environment variable NO_MODEL_LOAD is set
12
+ if os.environ.get('NO_MODEL_LOAD'):
13
+ tokenizer = None
14
+ model = None
15
+ else:
16
+ # Import heavy transformer classes lazily
17
+ from transformers import AutoModel, AutoTokenizer
18
+ tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
19
+ try:
20
+ model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
21
+ model = model.eval()
22
+ except Exception as e:
23
+ # If model fails to load (e.g. due to no network or heavy resources), keep a placeholder
24
+ model = None
25
 
26
+
27
+ def process_image(image):
28
+ """Saves an uploaded image to a temporary file and runs `model.chat(tokenizer, image_file, ocr_type='ocr')`.
29
+
30
+ Returns the model output as a string. If the model is unavailable or an
31
+ exception occurs, returns an informative error string.
32
+ """
33
+ if image is None:
34
+ return "No image provided."
35
+
36
+ # Convert numpy arrays to PIL Image if needed
37
+ if isinstance(image, np.ndarray):
38
+ pil_img = Image.fromarray(image)
39
+ else:
40
+ pil_img = image
41
+
42
+ # Save the image to a temp file (model.chat expects a path)
43
+ tmpfile = None
44
+ try:
45
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
46
+ tmpfile = tmp.name
47
+ pil_img.save(tmpfile, format='JPEG')
48
+ tmp.close()
49
+
50
+ if model is None or not hasattr(model, 'chat'):
51
+ return "Model not available or does not implement `chat`."
52
+
53
+ # Call the model.chat method using an image file path (as requested)
54
+ res = model.chat(tokenizer, tmpfile, ocr_type='ocr')
55
+
56
+ # Try to give a human-readable string
57
+ try:
58
+ return str(res)
59
+ except Exception:
60
+ return f"Model returned an object of type {type(res)}: {res}"
61
+ except Exception as e:
62
+ return f"Error processing image: {repr(e)}"
63
+ finally:
64
+ # Clean up temp file
65
+ if tmpfile and os.path.exists(tmpfile):
66
+ try:
67
+ os.remove(tmpfile)
68
+ except Exception:
69
+ pass
70
+
71
+
72
+ def _launch_demo():
73
+ """Create a Gradio Blocks UI and launch it. The interface contains an image
74
+ uploader, a 'Process' button, and a text output box which displays the
75
+ OCR/chat results from the loaded model.
76
+ """
77
+ with gr.Blocks(title="OCR Processing Demo") as demo:
78
+ gr.Markdown("## OCR Processing Demo\nUpload an image and press **Process** to run the OCR model.")
79
+ with gr.Row():
80
+ image_input = gr.Image(type='pil', label='Upload Image')
81
+ output_text = gr.Textbox(label='Detected text / model output', lines=8)
82
+ process_btn = gr.Button('Process')
83
+ process_btn.click(fn=process_image, inputs=image_input, outputs=output_text)
84
+ return demo
85
+
86
+
87
+ if __name__ == "__main__":
88
+ demo = _launch_demo()
89
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ torch
4
+ Pillow
5
+ numpy
6
+ safetensorsgradio
7
+ pillow
8
+ numpy