Spaces:
Build error
Build error
Vaibhav Gaikwad commited on
Commit ·
cf220c1
1
Parent(s): 374083e
optimise: ocr on cpu, reduce tts gpu reservation
Browse files
app.py
CHANGED
|
@@ -64,9 +64,9 @@ dit_model = AutoModelForImageClassification.from_pretrained('microsoft/dit-b
|
|
| 64 |
dit_model.eval()
|
| 65 |
print('dit-base loaded.')
|
| 66 |
|
| 67 |
-
# -- ocr: easyocr (lazy-init
|
| 68 |
ocr_reader = None
|
| 69 |
-
print('easyocr will lazy-init on first ocr request.')
|
| 70 |
|
| 71 |
# -- tts: kokoro --
|
| 72 |
import soundfile as sf
|
|
@@ -87,9 +87,9 @@ def pil_to_cv2(pil_image):
|
|
| 87 |
return cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
|
| 88 |
|
| 89 |
|
| 90 |
-
#
|
| 91 |
-
#
|
| 92 |
-
#
|
| 93 |
|
| 94 |
@spaces.GPU
|
| 95 |
def classify_fn(image):
|
|
@@ -122,17 +122,17 @@ def classify_fn(image):
|
|
| 122 |
return {'error': str(e)}
|
| 123 |
|
| 124 |
|
| 125 |
-
@spaces.GPU
|
| 126 |
def ocr_gpu(clean_image):
|
| 127 |
"""
|
| 128 |
runs easyocr on a preprocessed image.
|
| 129 |
-
|
|
|
|
| 130 |
"""
|
| 131 |
global ocr_reader
|
| 132 |
if ocr_reader is None:
|
| 133 |
import easyocr
|
| 134 |
-
ocr_reader = easyocr.Reader(['en'], gpu=
|
| 135 |
-
print('easyocr initialised on
|
| 136 |
|
| 137 |
results = ocr_reader.readtext(clean_image, detail=0)
|
| 138 |
return ' '.join(results)
|
|
@@ -143,8 +143,8 @@ def ocr_fn(image):
|
|
| 143 |
extracts text from a document image.
|
| 144 |
called via gradio api: /call/ocr
|
| 145 |
|
| 146 |
-
preprocessing
|
| 147 |
-
|
| 148 |
|
| 149 |
input: pil image (gradio Image component with type="pil")
|
| 150 |
output: extracted text string
|
|
@@ -159,7 +159,7 @@ def ocr_fn(image):
|
|
| 159 |
# preprocessing runs on cpu — outside the gpu function
|
| 160 |
clean = preprocess(cv2_image)
|
| 161 |
|
| 162 |
-
# ocr inference on
|
| 163 |
text = ocr_gpu(clean)
|
| 164 |
return text
|
| 165 |
|
|
@@ -167,7 +167,7 @@ def ocr_fn(image):
|
|
| 167 |
return f'error: {str(e)}'
|
| 168 |
|
| 169 |
|
| 170 |
-
@spaces.GPU(duration=
|
| 171 |
def speak_fn(text, voice):
|
| 172 |
"""
|
| 173 |
converts text to speech using kokoro.
|
|
|
|
| 64 |
dit_model.eval()
|
| 65 |
print('dit-base loaded.')
|
| 66 |
|
| 67 |
+
# -- ocr: easyocr (lazy-init on first call, runs on cpu to save gpu quota) --
|
| 68 |
ocr_reader = None
|
| 69 |
+
print('easyocr will lazy-init on first ocr request (cpu).')
|
| 70 |
|
| 71 |
# -- tts: kokoro --
|
| 72 |
import soundfile as sf
|
|
|
|
| 87 |
return cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
|
| 88 |
|
| 89 |
|
| 90 |
+
# -- endpoint section: ocr --
|
| 91 |
+
# preprocesses the image then runs easyocr — both on cpu.
|
| 92 |
+
# saves gpu quota for classify and tts only.
|
| 93 |
|
| 94 |
@spaces.GPU
|
| 95 |
def classify_fn(image):
|
|
|
|
| 122 |
return {'error': str(e)}
|
| 123 |
|
| 124 |
|
|
|
|
| 125 |
def ocr_gpu(clean_image):
|
| 126 |
"""
|
| 127 |
runs easyocr on a preprocessed image.
|
| 128 |
+
runs on cpu to save gpu quota — easyocr is fast enough on cpu.
|
| 129 |
+
lazy-inits on first call.
|
| 130 |
"""
|
| 131 |
global ocr_reader
|
| 132 |
if ocr_reader is None:
|
| 133 |
import easyocr
|
| 134 |
+
ocr_reader = easyocr.Reader(['en'], gpu=False, verbose=False)
|
| 135 |
+
print('easyocr initialised on cpu.')
|
| 136 |
|
| 137 |
results = ocr_reader.readtext(clean_image, detail=0)
|
| 138 |
return ' '.join(results)
|
|
|
|
| 143 |
extracts text from a document image.
|
| 144 |
called via gradio api: /call/ocr
|
| 145 |
|
| 146 |
+
both preprocessing and ocr run on cpu to save gpu quota.
|
| 147 |
+
easyocr is fast enough on cpu for document-sized images.
|
| 148 |
|
| 149 |
input: pil image (gradio Image component with type="pil")
|
| 150 |
output: extracted text string
|
|
|
|
| 159 |
# preprocessing runs on cpu — outside the gpu function
|
| 160 |
clean = preprocess(cv2_image)
|
| 161 |
|
| 162 |
+
# ocr inference on cpu
|
| 163 |
text = ocr_gpu(clean)
|
| 164 |
return text
|
| 165 |
|
|
|
|
| 167 |
return f'error: {str(e)}'
|
| 168 |
|
| 169 |
|
| 170 |
+
@spaces.GPU(duration=15)
|
| 171 |
def speak_fn(text, voice):
|
| 172 |
"""
|
| 173 |
converts text to speech using kokoro.
|