Vaibhav Gaikwad commited on
Commit
cf220c1
·
1 Parent(s): 374083e

optimise: ocr on cpu, reduce tts gpu reservation

Browse files
Files changed (1) hide show
  1. app.py +13 -13
app.py CHANGED
@@ -64,9 +64,9 @@ dit_model = AutoModelForImageClassification.from_pretrained('microsoft/dit-b
64
  dit_model.eval()
65
  print('dit-base loaded.')
66
 
67
- # -- ocr: easyocr (lazy-init inside gpu function so it binds to cuda) --
68
  ocr_reader = None
69
- print('easyocr will lazy-init on first ocr request.')
70
 
71
  # -- tts: kokoro --
72
  import soundfile as sf
@@ -87,9 +87,9 @@ def pil_to_cv2(pil_image):
87
  return cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
88
 
89
 
90
- # ============================================================
91
- # -- gpu functions --
92
- # ============================================================
93
 
94
  @spaces.GPU
95
  def classify_fn(image):
@@ -122,17 +122,17 @@ def classify_fn(image):
122
  return {'error': str(e)}
123
 
124
 
125
- @spaces.GPU
126
  def ocr_gpu(clean_image):
127
  """
128
  runs easyocr on a preprocessed image.
129
- easyocr lazy-inits on first call so it binds to cuda.
 
130
  """
131
  global ocr_reader
132
  if ocr_reader is None:
133
  import easyocr
134
- ocr_reader = easyocr.Reader(['en'], gpu=True, verbose=False)
135
- print('easyocr initialised on gpu.')
136
 
137
  results = ocr_reader.readtext(clean_image, detail=0)
138
  return ' '.join(results)
@@ -143,8 +143,8 @@ def ocr_fn(image):
143
  extracts text from a document image.
144
  called via gradio api: /call/ocr
145
 
146
- preprocessing (deskew, denoise, contrast, binarise) runs on cpu
147
- before the gpu function is called for ocr inference.
148
 
149
  input: pil image (gradio Image component with type="pil")
150
  output: extracted text string
@@ -159,7 +159,7 @@ def ocr_fn(image):
159
  # preprocessing runs on cpu — outside the gpu function
160
  clean = preprocess(cv2_image)
161
 
162
- # ocr inference on gpu
163
  text = ocr_gpu(clean)
164
  return text
165
 
@@ -167,7 +167,7 @@ def ocr_fn(image):
167
  return f'error: {str(e)}'
168
 
169
 
170
- @spaces.GPU(duration=30)
171
  def speak_fn(text, voice):
172
  """
173
  converts text to speech using kokoro.
 
64
  dit_model.eval()
65
  print('dit-base loaded.')
66
 
67
+ # -- ocr: easyocr (lazy-init on first call, runs on cpu to save gpu quota) --
68
  ocr_reader = None
69
+ print('easyocr will lazy-init on first ocr request (cpu).')
70
 
71
  # -- tts: kokoro --
72
  import soundfile as sf
 
87
  return cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
88
 
89
 
90
+ # -- endpoint section: ocr --
91
+ # preprocesses the image then runs easyocr — both on cpu.
92
+ # saves gpu quota for classify and tts only.
93
 
94
  @spaces.GPU
95
  def classify_fn(image):
 
122
  return {'error': str(e)}
123
 
124
 
 
125
  def ocr_gpu(clean_image):
126
  """
127
  runs easyocr on a preprocessed image.
128
+ runs on cpu to save gpu quota easyocr is fast enough on cpu.
129
+ lazy-inits on first call.
130
  """
131
  global ocr_reader
132
  if ocr_reader is None:
133
  import easyocr
134
+ ocr_reader = easyocr.Reader(['en'], gpu=False, verbose=False)
135
+ print('easyocr initialised on cpu.')
136
 
137
  results = ocr_reader.readtext(clean_image, detail=0)
138
  return ' '.join(results)
 
143
  extracts text from a document image.
144
  called via gradio api: /call/ocr
145
 
146
+ both preprocessing and ocr run on cpu to save gpu quota.
147
+ easyocr is fast enough on cpu for document-sized images.
148
 
149
  input: pil image (gradio Image component with type="pil")
150
  output: extracted text string
 
159
  # preprocessing runs on cpu — outside the gpu function
160
  clean = preprocess(cv2_image)
161
 
162
+ # ocr inference on cpu
163
  text = ocr_gpu(clean)
164
  return text
165
 
 
167
  return f'error: {str(e)}'
168
 
169
 
170
+ @spaces.GPU(duration=15)
171
  def speak_fn(text, voice):
172
  """
173
  converts text to speech using kokoro.