Spaces:

adiharel30
/

HebrewTranscriber

Runtime error

adiharel30 commited on Nov 12, 2024

Commit

cec7db1

verified ·

1 Parent(s): 99636e7

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse
+import os
+from pydub import AudioSegment
+import aiofiles
+import faster_whisper
+# Initialize the FastAPI app
+app = FastAPI()
+# Initialize the model with GPU support
+model = faster_whisper.WhisperModel('ivrit-ai/faster-whisper-v2-d4', device="cuda", compute_type="float32")
+# Define file paths
+TEMP_FILE_PATH = "temp_audio_file.m4a"
+WAV_FILE_PATH = "temp_audio_file.wav"
+@app.post("/transcribe")
+async def transcribe(request: Request):
+    # Stream the file directly to a temporary file on disk
+    async with aiofiles.open(TEMP_FILE_PATH, 'wb') as out_file:
+        async for chunk in request.stream():
+            await out_file.write(chunk)
+    print("File saved successfully.")
+    # Convert M4A to WAV
+    try:
+        audio = AudioSegment.from_file(TEMP_FILE_PATH, format="m4a")
+        audio.export(WAV_FILE_PATH, format="wav")
+        print("Conversion to WAV successful.")
+    except Exception as e:
+        print("Error during conversion:", e)
+        return JSONResponse({"detail": "Error in audio conversion"}, status_code=400)
+    # Transcribe the WAV audio file
+    segments, _ = model.transcribe(WAV_FILE_PATH, language='he')
+    transcribed_text = ' '.join([s.text for s in segments])
+    # Clean up temporary files
+    os.remove(TEMP_FILE_PATH)
+    os.remove(WAV_FILE_PATH)
+    return JSONResponse({"transcribed_text": transcribed_text})