Spaces:

sae8d
/

bayan-ai

Sleeping

App Files Files Community

sae8d commited on Feb 11

Commit

4495edf

verified ·

1 Parent(s): 6bd8d23

Upload 2 files

Browse files

Files changed (2) hide show

main.py +42 -7
requirements.txt +3 -1

main.py CHANGED Viewed

@@ -8,6 +8,8 @@ import os
 from difflib import SequenceMatcher
 from typing import Dict, Any, Optional
 import tempfile
 app = FastAPI(
     title="Bayan AI بيان",
@@ -287,21 +289,54 @@ def root():
 @app.post("/recognize")
 async def recognize(file: UploadFile = File(...)):
-    if not file.content_type or not file.content_type.startswith("audio/"):
-        raise HTTPException(status_code=400, detail="File must be an audio file")
-    # Save to temp file (pipeline accepts file path directly)
     contents = await file.read()
-    with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1] or ".wav") as tmp:
         tmp.write(contents)
-        tmp_path = tmp.name
     try:
-        transcription = pipe(tmp_path)["text"]
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Transcription error: {str(e)}")
     finally:
-        os.unlink(tmp_path)
     result = find_best_verse(transcription)
     result["transcription"] = transcription

 from difflib import SequenceMatcher
 from typing import Dict, Any, Optional
 import tempfile
+import subprocess
+import shutil
 app = FastAPI(
     title="Bayan AI بيان",
 @app.post("/recognize")
 async def recognize(file: UploadFile = File(...)):
+    # Allow both audio and video
+    is_video = file.content_type and file.content_type.startswith("video/")
+    is_audio = file.content_type and file.content_type.startswith("audio/")
+    if not is_audio and not is_video:
+        raise HTTPException(status_code=400, detail="File must be an audio or video file")
+    # Save to temp file
     contents = await file.read()
+    file_extension = os.path.splitext(file.filename)[1] or (".mp4" if is_video else ".wav")
+    with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp:
         tmp.write(contents)
+        input_path = tmp.name
+    audio_path = input_path
+    temp_audio_path = None
     try:
+        if is_video:
+            # Check if ffmpeg is installed
+            if not shutil.which("ffmpeg"):
+                raise HTTPException(status_code=500, detail="ffmpeg not found on server")
+            temp_audio_path = input_path + "_converted.wav"
+            # Extract audio quickly and silently
+            # -vn: no video, -acodec pcm_s16le: wav format, -ar 16000: whisper preferred sample rate
+            # -y: overwrite, -loglevel error: be silent
+            cmd = [
+                "ffmpeg", "-y", "-i", input_path,
+                "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1",
+                "-loglevel", "error",
+                temp_audio_path
+            ]
+            subprocess.run(cmd, check=True)
+            audio_path = temp_audio_path
+        transcription = pipe(audio_path)["text"]
+    except subprocess.CalledProcessError as e:
+        raise HTTPException(status_code=500, detail=f"Video conversion error: {str(e)}")
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Transcription error: {str(e)}")
     finally:
+        # Clean up all temp files
+        if os.path.exists(input_path):
+            os.unlink(input_path)
+        if temp_audio_path and os.path.exists(temp_audio_path):
+            os.unlink(temp_audio_path)
     result = find_best_verse(transcription)
     result["transcription"] = transcription

requirements.txt CHANGED Viewed

@@ -3,4 +3,6 @@ uvicorn
 python-multipart
 torch
 transformers
-scipy

 python-multipart
 torch
 transformers
+scipy
+librosa
+accelerate