Audio2Text

Paused

App Files Files Community

Ricky01anjay commited on Mar 1

Commit

94561ca

verified ·

1 Parent(s): 41a36e7

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -66

app.py CHANGED Viewed

@@ -1,80 +1,70 @@
-"""
-Whisper Audio-to-Text – ZeroGPU edition
-Runs on 🤗 Spaces with ZeroGPU (A100) accelerator
-"""
 import os
 import tempfile
-import gradio as gr
-import whisper
-import numpy as np
-from huggingface_hub import hf_hub_download
-# ------------------------------------------------------------------
-# 1.  ZeroGPU decorator
-# ------------------------------------------------------------------
-import spaces                       # pip install huggingface-hub>=0.16
-# ------------------------------------------------------------------
-# 2.  Load model once per GPU worker
-# ------------------------------------------------------------------
-MODEL_ID = "openai/whisper-base"    # pick any HF whisper ckpt
-MODEL = None
-def _load_model():
-    global MODEL
-    if MODEL is None:
-        # download weights from HF hub (cached)
-        ckpt = hf_hub_download(repo_id=MODEL_ID, filename="pytorch_model.bin")
-        MODEL = whisper.load_model("base")   # still uses same weights
-    return MODEL
-# ------------------------------------------------------------------
-# 3.  GPU-decorated transcription
-# ------------------------------------------------------------------
-@spaces.GPU
-def transcribe(audio):
     """
-    audio: filepath (upload) or (sr, data) tuple (mic)
-    returns: transcribed text
     """
-    if audio is None:
-        return "⚠️  No audio received."
-    # ---- handle microphone ----
-    if isinstance(audio, tuple):
-        sr, data = audio
-        data = data.astype(np.float32)
-        if np.abs(data).max() > 1.0:
-            data /= np.abs(data).max()
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
-            import soundfile as sf
-            sf.write(tmp.name, data, sr)
-            audio_path = tmp.name
-    else:
-        audio_path = audio
-    # ---- run Whisper on GPU ----
-    try:
-        model = _load_model()
-        result = model.transcribe(audio_path, fp16=True)   # fp16 OK on GPU
-        text = result["text"].strip()
-        return text if text else "🤷‍♂️ No speech detected."
     except Exception as e:
-        return f"❌ Error: {e}"
     finally:
-        if audio_path != audio and os.path.exists(audio_path):
-            os.unlink(audio_path)
-# ------------------------------------------------------------------
-# 4.  Gradio UI (unchanged)
-# ------------------------------------------------------------------
-demo = gr.Interface(
-    fn=transcribe,
-    inputs=gr.Audio(sources=["upload", "microphone"], type="filepath"),
-    outputs=gr.Textbox(label="Transcription", lines=6),
-    title="🎙️ Whisper Audio-to-Text (ZeroGPU)",
-    description="Upload or record audio → instant transcription on A100.",
-)
 if __name__ == "__main__":
-    demo.launch()

 import os
+import requests
 import tempfile
+import uvicorn
+from fastapi import FastAPI, HTTPException, Query
+from faster_whisper import WhisperModel
+# 1. Inisialisasi FastAPI
+app = FastAPI(title="Open Whisper API (No Limits)")
+# 2. Inisialisasi Model (Base sudah cukup akurat dan cepat)
+# Gunakan device="cuda" jika server punya GPU NVIDIA
+model_size = "base"
+print(f"Loading Whisper model '{model_size}'...")
+model = WhisperModel(model_size, device="cpu", compute_type="int8")
+@app.get("/")
+def home():
+    return {"status": "API Active", "usage": "/generate?url=YOUR_AUDIO_URL"}
+@app.get("/generate")
+async def generate_transcription(url: str = Query(..., description="URL file audio (mp3, wav, m4a, dll)")):
     """
+    Menerima URL audio, mendownloadnya, dan mengembalikan teks hasil transkripsi.
     """
+    tmp_path = None
+    try:
+        # 3. Download file dari URL ke folder temp
+        suffix = ".wav" # Default suffix
+        if "." in url:
+            potential_ext = "." + url.split(".")[-1].split("?")[0]
+            if len(potential_ext) <= 5:
+                suffix = potential_ext
+        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+            response = requests.get(url, stream=True, timeout=30)
+            response.raise_for_status() # Cek jika download gagal
+            for chunk in response.iter_content(chunk_size=8192):
+                tmp.write(chunk)
+            tmp_path = tmp.name
+        # 4. Proses Transkripsi
+        print(f"Processing: {url}")
+        segments, info = model.transcribe(tmp_path, beam_size=5)
+        full_text = " ".join([segment.text for segment in segments]).strip()
+        # 5. Kembalikan Response JSON
+        return {
+            "success": True,
+            "language": info.language,
+            "language_probability": info.language_probability,
+            "text": full_text,
+            "url_processed": url
+        }
+    except requests.exceptions.RequestException as e:
+        raise HTTPException(status_code=400, detail=f"Gagal mendownload file: {str(e)}")
     except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Internal Error: {str(e)}")
     finally:
+        # 6. Bersihkan file sementara
+        if tmp_path and os.path.exists(tmp_path):
+            os.remove(tmp_path)
 if __name__ == "__main__":
+    # Jalankan server
+    uvicorn.run(app, host="0.0.0.0", port=7860)