Spaces:

Kumaria
/

audio_to_text

Sleeping

App Files Files Community

Kumaria commited on Mar 9

Commit

28752c0

verified ·

1 Parent(s): 6ad043b

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -38

app.py CHANGED Viewed

@@ -5,11 +5,8 @@ import requests
 HF_TOKEN = os.environ.get("HF_TOKEN")
-MODELS = {
-    "whisper-large-v3": "https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3",
-    "whisper-medium":   "https://router.huggingface.co/hf-inference/models/openai/whisper-medium",
-    "whisper-base":     "https://router.huggingface.co/hf-inference/models/openai/whisper-base",
-}
 CONTENT_TYPES = {
     ".wav":  "audio/wav",
@@ -22,7 +19,7 @@ CONTENT_TYPES = {
     ".amr":  "audio/AMR",
 }
-def transcribe(audio_file, model_choice):
     if audio_file is None:
         return "Please upload or record an audio file."
@@ -31,31 +28,26 @@ def transcribe(audio_file, model_choice):
     ext = os.path.splitext(audio_file)[-1].lower()
     content_type = CONTENT_TYPES.get(ext, "audio/wav")
-    api_url = MODELS[model_choice]
     with open(audio_file, "rb") as f:
         audio_bytes = f.read()
     print(f"File: {audio_file} | Ext: {ext} | Content-Type: {content_type} | Size: {len(audio_bytes)} bytes")
-    max_retries = 5
-    retry_delay = 20  # seconds between retries
-    for attempt in range(1, max_retries + 1):
         try:
-            print(f"Attempt {attempt}/{max_retries}...")
             response = requests.post(
-                api_url,
                 headers={
                     "Authorization": f"Bearer {HF_TOKEN}",
                     "Content-Type": content_type,
                 },
                 data=audio_bytes,
-                timeout=120,  # 2 min timeout per request
             )
-            print(f"Status: {response.status_code}")
             if response.status_code == 200:
                 result = response.json()
@@ -64,42 +56,34 @@ def transcribe(audio_file, model_choice):
                 return str(result)
             elif response.status_code in (503, 504):
-                # Model loading or gateway timeout — wait and retry
-                if attempt < max_retries:
-                    print(f"Model not ready (HTTP {response.status_code}), retrying in {retry_delay}s...")
-                    time.sleep(retry_delay)
-                    continue
                 else:
-                    return f"⏳ Model is still loading after {max_retries} attempts. Please try again in a minute."
             else:
                 return f"❌ Error {response.status_code}: {response.text[:300]}"
         except requests.exceptions.Timeout:
-            if attempt < max_retries:
-                print(f"Request timed out, retrying in {retry_delay}s...")
-                time.sleep(retry_delay)
             else:
-                return "❌ Request timed out after multiple attempts. Try a smaller model like whisper-base."
         except Exception as e:
             return f"❌ {type(e).__name__}: {str(e)}"
-    return "❌ All retry attempts failed."
 with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.Markdown("### ⚙️ Settings")
-        gr.Markdown("💡 **Tip:** Use `whisper-base` for faster cold starts.")
-        model_choice = gr.Dropdown(
-            choices=list(MODELS.keys()),
-            value="whisper-base",  # fastest to cold-start
-            label="Whisper Model",
-        )
     gr.Markdown("# 🎤 Whisper Audio Transcription")
-    gr.Markdown("Upload or record audio to get an instant transcript. First request may take ~1 min to warm up.")
     with gr.Row():
         with gr.Column():
@@ -119,7 +103,7 @@ with gr.Blocks() as demo:
     transcribe_btn.click(
         fn=transcribe,
-        inputs=[audio_input, model_choice],
         outputs=transcript_output,
     )

 HF_TOKEN = os.environ.get("HF_TOKEN")
+# Only confirmed working free model on hf-inference router
+API_URL = "https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3"
 CONTENT_TYPES = {
     ".wav":  "audio/wav",
     ".amr":  "audio/AMR",
 }
+def transcribe(audio_file):
     if audio_file is None:
         return "Please upload or record an audio file."
     ext = os.path.splitext(audio_file)[-1].lower()
     content_type = CONTENT_TYPES.get(ext, "audio/wav")
     with open(audio_file, "rb") as f:
         audio_bytes = f.read()
     print(f"File: {audio_file} | Ext: {ext} | Content-Type: {content_type} | Size: {len(audio_bytes)} bytes")
+    for attempt in range(1, 6):
         try:
+            print(f"Attempt {attempt}/5...")
             response = requests.post(
+                API_URL,
                 headers={
                     "Authorization": f"Bearer {HF_TOKEN}",
                     "Content-Type": content_type,
                 },
                 data=audio_bytes,
+                timeout=120,
             )
+            print(f"Status: {response.status_code} | Body: {response.text[:200]}")
             if response.status_code == 200:
                 result = response.json()
                 return str(result)
             elif response.status_code in (503, 504):
+                if attempt < 5:
+                    print(f"Model warming up, retrying in 20s...")
+                    time.sleep(20)
                 else:
+                    return "⏳ Model still loading after 5 attempts. Please try again in a minute."
+            elif response.status_code == 429:
+                return "⚠️ Rate limit hit. Please wait a moment and try again."
             else:
                 return f"❌ Error {response.status_code}: {response.text[:300]}"
         except requests.exceptions.Timeout:
+            if attempt < 5:
+                print("Timeout, retrying in 20s...")
+                time.sleep(20)
             else:
+                return "❌ Request timed out repeatedly. Try a shorter audio clip."
         except Exception as e:
             return f"❌ {type(e).__name__}: {str(e)}"
+    return "❌ All attempts failed."
 with gr.Blocks() as demo:
     gr.Markdown("# 🎤 Whisper Audio Transcription")
+    gr.Markdown("Using `openai/whisper-large-v3` via HF free inference. First request may take ~30s to warm up.")
     with gr.Row():
         with gr.Column():
     transcribe_btn.click(
         fn=transcribe,
+        inputs=[audio_input],
         outputs=transcript_output,
     )