Spaces:

vaibreact
/

audiolens-backend

Build error

App Files Files Community

Vaibhav Gaikwad commited on Mar 23

Commit

5de371d

1 Parent(s): 0e8ef8a

Revert "gpu to cpu fallback when limit quota exceeds"

Browse files

This reverts commit 0e8ef8a6fd94c23c977a6412ed027f70d54bfb4b.

Files changed (1) hide show

app.py +15 -64

app.py CHANGED Viewed

@@ -3,20 +3,15 @@ audiolens — app.py
 huggingface space backend (zerogpu + gradio native api)
 api endpoints (via gradio):
-    /call/classify       — document type classification (dit-base, gpu)
-    /call/classify_cpu   — same but cpu-only fallback (no gpu quota needed)
-    /call/ocr            — text extraction (easyocr, cpu)
-    /call/speak          — text to speech (kokoro, gpu)
-    /call/health         — check if space is warm
 the pwa calls these using the gradio js client (@gradio/client)
 or via gradio's rest api. each function decorated with @spaces.GPU
 gets a gpu allocation only for the duration of that call.
-when gpu quota is exceeded, the pwa falls back to:
-  - /call/classify_cpu for classification (slower but works)
-  - browser Web Speech API for tts (no server needed)
 llm extraction (gemini) is called directly from the pwa — not here.
 """
@@ -35,17 +30,6 @@ import gradio as gr
 from j2_preprocess import preprocess
-def get_device():
-    """picks the best available device at call time.
-    on hf, cuda is only available inside @spaces.GPU functions.
-    on mac, mps is always available. falls back to cpu."""
-    if torch.cuda.is_available():
-        return 'cuda'
-    if torch.backends.mps.is_available():
-        return 'mps'
-    return 'cpu'
 # ============================================================
 # -- dit class mapping --
 # ============================================================
@@ -120,9 +104,8 @@ def classify_fn(image):
         return {'error': 'no image provided'}
     try:
-        device = get_device()
-        dit_model.to(device)
-        inputs = dit_processor(images=image, return_tensors='pt').to(device)
         with torch.no_grad():
             logits = dit_model(**inputs).logits
@@ -139,34 +122,6 @@ def classify_fn(image):
         return {'error': str(e)}
-def classify_cpu_fn(image):
-    """
-    cpu-only fallback for classification.
-    called when gpu quota is exceeded.
-    same logic as classify_fn but runs entirely on cpu — slower but no quota.
-    called via gradio api: /call/classify_cpu
-    """
-    if image is None:
-        return {'error': 'no image provided'}
-    try:
-        dit_model.to('cpu')
-        inputs = dit_processor(images=image, return_tensors='pt').to('cpu')
-        with torch.no_grad():
-            logits = dit_model(**inputs).logits
-        selected_logits = logits[0, SELECTED_RVL_IDX]
-        pred_idx        = selected_logits.argmax().item()
-        confidence      = torch.softmax(selected_logits, dim=0)[pred_idx].item()
-        doc_type        = DIT_CLASS_MAP[SELECTED_RVL_IDX[pred_idx]]
-        return {'doc_type': doc_type, 'confidence': round(confidence, 4)}
-    except Exception as e:
-        return {'error': str(e)}
 def ocr_gpu(clean_image):
     """
     runs easyocr on a preprocessed image.
@@ -198,14 +153,21 @@ def ocr_fn(image):
         return 'error: no image provided'
     try:
-        # convert pil to cv2
         cv2_image = pil_to_cv2(image)
-        # preprocessing — easyocr handles its own internally test
         # clean = preprocess(cv2_image)
         # ocr inference on cpu
         text = ocr_gpu(cv2_image)
         return text
     except Exception as e:
@@ -312,17 +274,6 @@ with gr.Blocks(title='AudioLens API') as demo:
         api_name='health',
     )
-    # -- cpu fallbacks (hidden, api only — used when gpu quota is exceeded) --
-    classify_cpu_img = gr.Image(type='pil', visible=False)
-    classify_cpu_out = gr.JSON(visible=False)
-    classify_cpu_btn = gr.Button('classify_cpu', visible=False)
-    classify_cpu_btn.click(
-        fn=classify_cpu_fn,
-        inputs=classify_cpu_img,
-        outputs=classify_cpu_out,
-        api_name='classify_cpu',
-    )
     gr.Markdown("""
     ---
     **API endpoints** (use via [@gradio/client](https://www.gradio.app/guides/getting-started-with-the-js-client)):

 huggingface space backend (zerogpu + gradio native api)
 api endpoints (via gradio):
+    /call/classify   — document type classification (dit-base)
+    /call/ocr        — text extraction (easyocr)
+    /call/speak      — text to speech (kokoro)
+    /call/health     — check if space is warm
 the pwa calls these using the gradio js client (@gradio/client)
 or via gradio's rest api. each function decorated with @spaces.GPU
 gets a gpu allocation only for the duration of that call.
 llm extraction (gemini) is called directly from the pwa — not here.
 """
 from j2_preprocess import preprocess
 # ============================================================
 # -- dit class mapping --
 # ============================================================
         return {'error': 'no image provided'}
     try:
+        dit_model.to('cuda')
+        inputs = dit_processor(images=image, return_tensors='pt').to('cuda')
         with torch.no_grad():
             logits = dit_model(**inputs).logits
         return {'error': str(e)}
 def ocr_gpu(clean_image):
     """
     runs easyocr on a preprocessed image.
         return 'error: no image provided'
     try:
+        # convert pil to cv2 for preprocessing
         cv2_image = pil_to_cv2(image)
+        # # preprocessing runs on cpu — outside the gpu function
+        # clean = preprocess(cv2_image)
+        # # ocr inference on cpu
+        # text = ocr_gpu(clean)
+        # trusting easyOCR for test preprocess
         # clean = preprocess(cv2_image)
         # ocr inference on cpu
         text = ocr_gpu(cv2_image)
         return text
     except Exception as e:
         api_name='health',
     )
     gr.Markdown("""
     ---
     **API endpoints** (use via [@gradio/client](https://www.gradio.app/guides/getting-started-with-the-js-client)):