Spaces:

narendraa
/

Auralyn

Sleeping

narendraa commited on Oct 29, 2025

Commit

9dcdc97

verified ·

1 Parent(s): 02a9c21

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,8 +7,8 @@ import os
 app = FastAPI()
-# ✅ Use multilingual Whisper model with auto language detection
-# (You can try "small" if performance allows — more accurate for Indian languages)
 asr = pipeline(
     "automatic-speech-recognition",
     model="openai/whisper-base",
@@ -17,14 +17,14 @@ asr = pipeline(
 @app.post("/predict")
 async def predict(file: UploadFile = File(...)):
-    input_path = "/tmp/input.webm"
-    wav_path = "/tmp/input.wav"
     # Save uploaded file
     with open(input_path, "wb") as f:
         f.write(await file.read())
-    # Convert WebM → WAV
     subprocess.run([
         "ffmpeg", "-y", "-i", input_path,
         "-ac", "1", "-ar", "16000", wav_path
@@ -34,19 +34,22 @@ async def predict(file: UploadFile = File(...)):
     waveform, sr = torchaudio.load(wav_path)
     waveform = waveform.to(torch.float32)
-    # ✅ Auto-detect language and transcribe (no translation)
     result = asr(
         {"array": waveform[0].numpy(), "sampling_rate": sr},
-        generate_kwargs={"task": "transcribe"}  # key for multilingual auto-detection
     )
-    # Cleanup
     os.remove(input_path)
     os.remove(wav_path)
-    # ✅ Return text + detected language
     return {
-        "text": result["text"],
-        "language": result.get("language", "auto-detected"),
-        "note": "Auto language detection enabled — supports Hindi, Tamil, Bengali, etc."
     }

 app = FastAPI()
+# ✅ Multilingual model (better Hindi-English support than tiny)
+# You can switch to "openai/whisper-small" for even better accuracy if your container allows.
 asr = pipeline(
     "automatic-speech-recognition",
     model="openai/whisper-base",
 @app.post("/predict")
 async def predict(file: UploadFile = File(...)):
+    input_path = "/tmp/input_audio.webm"
+    wav_path = "/tmp/input_audio.wav"
     # Save uploaded file
     with open(input_path, "wb") as f:
         f.write(await file.read())
+    # Convert to 16 kHz mono WAV — ensures consistency
     subprocess.run([
         "ffmpeg", "-y", "-i", input_path,
         "-ac", "1", "-ar", "16000", wav_path
     waveform, sr = torchaudio.load(wav_path)
     waveform = waveform.to(torch.float32)
+    # ✅ Transcribe with automatic language detection
+    # The 'task': 'transcribe' ensures Whisper writes what it hears, no translation.
     result = asr(
         {"array": waveform[0].numpy(), "sampling_rate": sr},
+        generate_kwargs={
+            "task": "transcribe",  # disables translation
+            "language": None       # auto-detect language
+        }
     )
+    # Cleanup temp files
     os.remove(input_path)
     os.remove(wav_path)
     return {
+        "text": result["text"].strip(),
+        "language": result.get("language", "auto"),
+        "note": "Auto language detection enabled. Optimized for Hindi + English speech."
     }