Spaces:

vaibreact
/

audiolens-backend

Build error

App Files Files Community

Vaibhav Gaikwad commited on Mar 23

Commit

0e8ef8a

1 Parent(s): 021b753

gpu to cpu fallback when limit quota exceeds

Browse files

Files changed (1) hide show

app.py +64 -15

app.py CHANGED Viewed

@@ -3,15 +3,20 @@ audiolens — app.py
 huggingface space backend (zerogpu + gradio native api)
 api endpoints (via gradio):
-    /call/classify   — document type classification (dit-base)
-    /call/ocr        — text extraction (easyocr)
-    /call/speak      — text to speech (kokoro)
-    /call/health     — check if space is warm
 the pwa calls these using the gradio js client (@gradio/client)
 or via gradio's rest api. each function decorated with @spaces.GPU
 gets a gpu allocation only for the duration of that call.
 llm extraction (gemini) is called directly from the pwa — not here.
 """
@@ -30,6 +35,17 @@ import gradio as gr
 from j2_preprocess import preprocess
 # ============================================================
 # -- dit class mapping --
 # ============================================================
@@ -104,8 +120,9 @@ def classify_fn(image):
         return {'error': 'no image provided'}
     try:
-        dit_model.to('cuda')
-        inputs = dit_processor(images=image, return_tensors='pt').to('cuda')
         with torch.no_grad():
             logits = dit_model(**inputs).logits
@@ -122,6 +139,34 @@ def classify_fn(image):
         return {'error': str(e)}
 def ocr_gpu(clean_image):
     """
     runs easyocr on a preprocessed image.
@@ -153,21 +198,14 @@ def ocr_fn(image):
         return 'error: no image provided'
     try:
-        # convert pil to cv2 for preprocessing
         cv2_image = pil_to_cv2(image)
-        # # preprocessing runs on cpu — outside the gpu function
-        # clean = preprocess(cv2_image)
-        # # ocr inference on cpu
-        # text = ocr_gpu(clean)
-        # trusting easyOCR for test preprocess
         # clean = preprocess(cv2_image)
         # ocr inference on cpu
         text = ocr_gpu(cv2_image)
         return text
     except Exception as e:
@@ -274,6 +312,17 @@ with gr.Blocks(title='AudioLens API') as demo:
         api_name='health',
     )
     gr.Markdown("""
     ---
     **API endpoints** (use via [@gradio/client](https://www.gradio.app/guides/getting-started-with-the-js-client)):

 huggingface space backend (zerogpu + gradio native api)
 api endpoints (via gradio):
+    /call/classify       — document type classification (dit-base, gpu)
+    /call/classify_cpu   — same but cpu-only fallback (no gpu quota needed)
+    /call/ocr            — text extraction (easyocr, cpu)
+    /call/speak          — text to speech (kokoro, gpu)
+    /call/health         — check if space is warm
 the pwa calls these using the gradio js client (@gradio/client)
 or via gradio's rest api. each function decorated with @spaces.GPU
 gets a gpu allocation only for the duration of that call.
+when gpu quota is exceeded, the pwa falls back to:
+  - /call/classify_cpu for classification (slower but works)
+  - browser Web Speech API for tts (no server needed)
 llm extraction (gemini) is called directly from the pwa — not here.
 """
 from j2_preprocess import preprocess
+def get_device():
+    """picks the best available device at call time.
+    on hf, cuda is only available inside @spaces.GPU functions.
+    on mac, mps is always available. falls back to cpu."""
+    if torch.cuda.is_available():
+        return 'cuda'
+    if torch.backends.mps.is_available():
+        return 'mps'
+    return 'cpu'
 # ============================================================
 # -- dit class mapping --
 # ============================================================
         return {'error': 'no image provided'}
     try:
+        device = get_device()
+        dit_model.to(device)
+        inputs = dit_processor(images=image, return_tensors='pt').to(device)
         with torch.no_grad():
             logits = dit_model(**inputs).logits
         return {'error': str(e)}
+def classify_cpu_fn(image):
+    """
+    cpu-only fallback for classification.
+    called when gpu quota is exceeded.
+    same logic as classify_fn but runs entirely on cpu — slower but no quota.
+    called via gradio api: /call/classify_cpu
+    """
+    if image is None:
+        return {'error': 'no image provided'}
+    try:
+        dit_model.to('cpu')
+        inputs = dit_processor(images=image, return_tensors='pt').to('cpu')
+        with torch.no_grad():
+            logits = dit_model(**inputs).logits
+        selected_logits = logits[0, SELECTED_RVL_IDX]
+        pred_idx        = selected_logits.argmax().item()
+        confidence      = torch.softmax(selected_logits, dim=0)[pred_idx].item()
+        doc_type        = DIT_CLASS_MAP[SELECTED_RVL_IDX[pred_idx]]
+        return {'doc_type': doc_type, 'confidence': round(confidence, 4)}
+    except Exception as e:
+        return {'error': str(e)}
 def ocr_gpu(clean_image):
     """
     runs easyocr on a preprocessed image.
         return 'error: no image provided'
     try:
+        # convert pil to cv2
         cv2_image = pil_to_cv2(image)
+        # preprocessing — easyocr handles its own internally test
         # clean = preprocess(cv2_image)
         # ocr inference on cpu
         text = ocr_gpu(cv2_image)
         return text
     except Exception as e:
         api_name='health',
     )
+    # -- cpu fallbacks (hidden, api only — used when gpu quota is exceeded) --
+    classify_cpu_img = gr.Image(type='pil', visible=False)
+    classify_cpu_out = gr.JSON(visible=False)
+    classify_cpu_btn = gr.Button('classify_cpu', visible=False)
+    classify_cpu_btn.click(
+        fn=classify_cpu_fn,
+        inputs=classify_cpu_img,
+        outputs=classify_cpu_out,
+        api_name='classify_cpu',
+    )
     gr.Markdown("""
     ---
     **API endpoints** (use via [@gradio/client](https://www.gradio.app/guides/getting-started-with-the-js-client)):