Upload 2 files
Browse files- main.py +42 -7
- requirements.txt +3 -1
main.py
CHANGED
|
@@ -8,6 +8,8 @@ import os
|
|
| 8 |
from difflib import SequenceMatcher
|
| 9 |
from typing import Dict, Any, Optional
|
| 10 |
import tempfile
|
|
|
|
|
|
|
| 11 |
|
| 12 |
app = FastAPI(
|
| 13 |
title="Bayan AI بيان",
|
|
@@ -287,21 +289,54 @@ def root():
|
|
| 287 |
|
| 288 |
@app.post("/recognize")
|
| 289 |
async def recognize(file: UploadFile = File(...)):
|
| 290 |
-
|
| 291 |
-
|
|
|
|
| 292 |
|
| 293 |
-
|
|
|
|
|
|
|
|
|
|
| 294 |
contents = await file.read()
|
| 295 |
-
|
|
|
|
|
|
|
| 296 |
tmp.write(contents)
|
| 297 |
-
|
|
|
|
|
|
|
|
|
|
| 298 |
|
| 299 |
try:
|
| 300 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
except Exception as e:
|
| 302 |
raise HTTPException(status_code=500, detail=f"Transcription error: {str(e)}")
|
| 303 |
finally:
|
| 304 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
|
| 306 |
result = find_best_verse(transcription)
|
| 307 |
result["transcription"] = transcription
|
|
|
|
| 8 |
from difflib import SequenceMatcher
|
| 9 |
from typing import Dict, Any, Optional
|
| 10 |
import tempfile
|
| 11 |
+
import subprocess
|
| 12 |
+
import shutil
|
| 13 |
|
| 14 |
app = FastAPI(
|
| 15 |
title="Bayan AI بيان",
|
|
|
|
| 289 |
|
| 290 |
@app.post("/recognize")
|
| 291 |
async def recognize(file: UploadFile = File(...)):
|
| 292 |
+
# Allow both audio and video
|
| 293 |
+
is_video = file.content_type and file.content_type.startswith("video/")
|
| 294 |
+
is_audio = file.content_type and file.content_type.startswith("audio/")
|
| 295 |
|
| 296 |
+
if not is_audio and not is_video:
|
| 297 |
+
raise HTTPException(status_code=400, detail="File must be an audio or video file")
|
| 298 |
+
|
| 299 |
+
# Save to temp file
|
| 300 |
contents = await file.read()
|
| 301 |
+
file_extension = os.path.splitext(file.filename)[1] or (".mp4" if is_video else ".wav")
|
| 302 |
+
|
| 303 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp:
|
| 304 |
tmp.write(contents)
|
| 305 |
+
input_path = tmp.name
|
| 306 |
+
|
| 307 |
+
audio_path = input_path
|
| 308 |
+
temp_audio_path = None
|
| 309 |
|
| 310 |
try:
|
| 311 |
+
if is_video:
|
| 312 |
+
# Check if ffmpeg is installed
|
| 313 |
+
if not shutil.which("ffmpeg"):
|
| 314 |
+
raise HTTPException(status_code=500, detail="ffmpeg not found on server")
|
| 315 |
+
|
| 316 |
+
temp_audio_path = input_path + "_converted.wav"
|
| 317 |
+
# Extract audio quickly and silently
|
| 318 |
+
# -vn: no video, -acodec pcm_s16le: wav format, -ar 16000: whisper preferred sample rate
|
| 319 |
+
# -y: overwrite, -loglevel error: be silent
|
| 320 |
+
cmd = [
|
| 321 |
+
"ffmpeg", "-y", "-i", input_path,
|
| 322 |
+
"-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1",
|
| 323 |
+
"-loglevel", "error",
|
| 324 |
+
temp_audio_path
|
| 325 |
+
]
|
| 326 |
+
subprocess.run(cmd, check=True)
|
| 327 |
+
audio_path = temp_audio_path
|
| 328 |
+
|
| 329 |
+
transcription = pipe(audio_path)["text"]
|
| 330 |
+
except subprocess.CalledProcessError as e:
|
| 331 |
+
raise HTTPException(status_code=500, detail=f"Video conversion error: {str(e)}")
|
| 332 |
except Exception as e:
|
| 333 |
raise HTTPException(status_code=500, detail=f"Transcription error: {str(e)}")
|
| 334 |
finally:
|
| 335 |
+
# Clean up all temp files
|
| 336 |
+
if os.path.exists(input_path):
|
| 337 |
+
os.unlink(input_path)
|
| 338 |
+
if temp_audio_path and os.path.exists(temp_audio_path):
|
| 339 |
+
os.unlink(temp_audio_path)
|
| 340 |
|
| 341 |
result = find_best_verse(transcription)
|
| 342 |
result["transcription"] = transcription
|
requirements.txt
CHANGED
|
@@ -3,4 +3,6 @@ uvicorn
|
|
| 3 |
python-multipart
|
| 4 |
torch
|
| 5 |
transformers
|
| 6 |
-
scipy
|
|
|
|
|
|
|
|
|
| 3 |
python-multipart
|
| 4 |
torch
|
| 5 |
transformers
|
| 6 |
+
scipy
|
| 7 |
+
librosa
|
| 8 |
+
accelerate
|