Spaces:

bichnhan2701
/

PhoWhisperBaseAPI

Sleeping

App Files Files Community

bichnhan2701 commited on 22 days ago

Commit

557b80a

1 Parent(s): 1e5dfc2

fix error 503

Browse files

Files changed (2) hide show

app/api/transcribe.py +0 -1
app/services/nlp_postprocess.py +42 -6

app/api/transcribe.py CHANGED Viewed

@@ -10,7 +10,6 @@ import time
 from app.core.audio_utils import save_upload_file, get_audio_info, ensure_wav_16k_mono, make_temp_path, download_file_from_url
 from app.core.asr_engine import load_model, transcribe_file, transcribe_file_chunks
 from app.config import settings
-from app.services.text_normalizer import normalize_text
 from app.services.nlp_postprocess import normalize_and_extract
 # Summary and mindmap generation moved to Note Service; do not import here
 from app.services.note_client import NoteServiceClient

 from app.core.audio_utils import save_upload_file, get_audio_info, ensure_wav_16k_mono, make_temp_path, download_file_from_url
 from app.core.asr_engine import load_model, transcribe_file, transcribe_file_chunks
 from app.config import settings
 from app.services.nlp_postprocess import normalize_and_extract
 # Summary and mindmap generation moved to Note Service; do not import here
 from app.services.note_client import NoteServiceClient

app/services/nlp_postprocess.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import asyncio
 import json
 import logging
 from app.infra.redis_client import redis_client
 from app.utils.hashing import sha256
@@ -17,6 +19,9 @@ except Exception:
         pass
 CACHE_TTL = 60 * 60 * 24 * 3  # 3 days
 # Tạo client Gemini nếu có API key
 _gemini_client = None
@@ -89,8 +94,43 @@ Cấu trúc JSON bắt buộc (chỉ trả JSON, không giải thích thêm):
             # resp.text là chuỗi model trả (có thể chứa code block)
             return resp.text
-        try:
-            text = await loop.run_in_executor(None, call)
             if text:
                 # clean JSON
                 start = text.find("{")
@@ -106,10 +146,6 @@ Cấu trúc JSON bắt buộc (chỉ trả JSON, không giải thích thêm):
                         logging.warning(f"[nlp_postprocess] Failed to parse Gemini JSON, fallback to raw_text: {e}")
                 else:
                     logging.warning("[nlp_postprocess] Gemini response has no JSON block, fallback to raw_text")
-        except GoogleAPIError as e:
-            logging.error(f"[nlp_postprocess] Gemini API error: {e}")
-        except Exception as e:
-            logging.exception(f"[nlp_postprocess] Gemini call failed, fallback to raw_text: {e}")
     # 4) Try write back to Redis (best effort)
     try:

 import asyncio
 import json
 import logging
+import random
+import time
 from app.infra.redis_client import redis_client
 from app.utils.hashing import sha256
         pass
 CACHE_TTL = 60 * 60 * 24 * 3  # 3 days
+# Retry settings for transient model errors (503 / UNAVAILABLE)
+RETRY_MAX_ATTEMPTS = 3
+RETRY_BASE_BACKOFF = 1.0
 # Tạo client Gemini nếu có API key
 _gemini_client = None
             # resp.text là chuỗi model trả (có thể chứa code block)
             return resp.text
+        # Try with a small exponential backoff for transient server errors
+        text = None
+        attempt = 0
+        while attempt < RETRY_MAX_ATTEMPTS:
+            attempt += 1
+            try:
+                text = await loop.run_in_executor(None, call)
+                break
+            except Exception as e:
+                # Try to detect transient server-side/genai errors (503 / UNAVAILABLE)
+                is_transient = False
+                try:
+                    # try to import genai-specific ServerError if available
+                    from google.genai import errors as _genai_errors  # type: ignore
+                    ServerError = getattr(_genai_errors, "ServerError", None)
+                except Exception:
+                    ServerError = None
+                if ServerError is not None and isinstance(e, ServerError):
+                    is_transient = True
+                else:
+                    msg = str(e)
+                    if "503" in msg or "UNAVAILABLE" in msg.upper() or "model is overloaded" in msg.lower():
+                        is_transient = True
+                if is_transient and attempt < RETRY_MAX_ATTEMPTS:
+                    backoff = RETRY_BASE_BACKOFF * (2 ** (attempt - 1)) + random.uniform(0, 0.5)
+                    logging.warning(f"[nlp_postprocess] Gemini transient error (attempt {attempt}): {e}; retrying in {backoff:.1f}s")
+                    # use asyncio.sleep to not block event loop
+                    await asyncio.sleep(backoff)
+                    continue
+                else:
+                    logging.exception(f"[nlp_postprocess] Gemini call failed, fallback to raw_text: {e}")
+                    text = None
+                    break
+        if text:
             if text:
                 # clean JSON
                 start = text.find("{")
                         logging.warning(f"[nlp_postprocess] Failed to parse Gemini JSON, fallback to raw_text: {e}")
                 else:
                     logging.warning("[nlp_postprocess] Gemini response has no JSON block, fallback to raw_text")
     # 4) Try write back to Redis (best effort)
     try: