sae8d commited on
Commit
4495edf
·
verified ·
1 Parent(s): 6bd8d23

Upload 2 files

Browse files
Files changed (2) hide show
  1. main.py +42 -7
  2. requirements.txt +3 -1
main.py CHANGED
@@ -8,6 +8,8 @@ import os
8
  from difflib import SequenceMatcher
9
  from typing import Dict, Any, Optional
10
  import tempfile
 
 
11
 
12
  app = FastAPI(
13
  title="Bayan AI بيان",
@@ -287,21 +289,54 @@ def root():
287
 
288
  @app.post("/recognize")
289
  async def recognize(file: UploadFile = File(...)):
290
- if not file.content_type or not file.content_type.startswith("audio/"):
291
- raise HTTPException(status_code=400, detail="File must be an audio file")
 
292
 
293
- # Save to temp file (pipeline accepts file path directly)
 
 
 
294
  contents = await file.read()
295
- with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1] or ".wav") as tmp:
 
 
296
  tmp.write(contents)
297
- tmp_path = tmp.name
 
 
 
298
 
299
  try:
300
- transcription = pipe(tmp_path)["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  except Exception as e:
302
  raise HTTPException(status_code=500, detail=f"Transcription error: {str(e)}")
303
  finally:
304
- os.unlink(tmp_path)
 
 
 
 
305
 
306
  result = find_best_verse(transcription)
307
  result["transcription"] = transcription
 
8
  from difflib import SequenceMatcher
9
  from typing import Dict, Any, Optional
10
  import tempfile
11
+ import subprocess
12
+ import shutil
13
 
14
  app = FastAPI(
15
  title="Bayan AI بيان",
 
289
 
290
  @app.post("/recognize")
291
  async def recognize(file: UploadFile = File(...)):
292
+ # Allow both audio and video
293
+ is_video = file.content_type and file.content_type.startswith("video/")
294
+ is_audio = file.content_type and file.content_type.startswith("audio/")
295
 
296
+ if not is_audio and not is_video:
297
+ raise HTTPException(status_code=400, detail="File must be an audio or video file")
298
+
299
+ # Save to temp file
300
  contents = await file.read()
301
+ file_extension = os.path.splitext(file.filename)[1] or (".mp4" if is_video else ".wav")
302
+
303
+ with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp:
304
  tmp.write(contents)
305
+ input_path = tmp.name
306
+
307
+ audio_path = input_path
308
+ temp_audio_path = None
309
 
310
  try:
311
+ if is_video:
312
+ # Check if ffmpeg is installed
313
+ if not shutil.which("ffmpeg"):
314
+ raise HTTPException(status_code=500, detail="ffmpeg not found on server")
315
+
316
+ temp_audio_path = input_path + "_converted.wav"
317
+ # Extract audio quickly and silently
318
+ # -vn: no video, -acodec pcm_s16le: wav format, -ar 16000: whisper preferred sample rate
319
+ # -y: overwrite, -loglevel error: be silent
320
+ cmd = [
321
+ "ffmpeg", "-y", "-i", input_path,
322
+ "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1",
323
+ "-loglevel", "error",
324
+ temp_audio_path
325
+ ]
326
+ subprocess.run(cmd, check=True)
327
+ audio_path = temp_audio_path
328
+
329
+ transcription = pipe(audio_path)["text"]
330
+ except subprocess.CalledProcessError as e:
331
+ raise HTTPException(status_code=500, detail=f"Video conversion error: {str(e)}")
332
  except Exception as e:
333
  raise HTTPException(status_code=500, detail=f"Transcription error: {str(e)}")
334
  finally:
335
+ # Clean up all temp files
336
+ if os.path.exists(input_path):
337
+ os.unlink(input_path)
338
+ if temp_audio_path and os.path.exists(temp_audio_path):
339
+ os.unlink(temp_audio_path)
340
 
341
  result = find_best_verse(transcription)
342
  result["transcription"] = transcription
requirements.txt CHANGED
@@ -3,4 +3,6 @@ uvicorn
3
  python-multipart
4
  torch
5
  transformers
6
- scipy
 
 
 
3
  python-multipart
4
  torch
5
  transformers
6
+ scipy
7
+ librosa
8
+ accelerate