Gaoussin commited on
Commit
315d5df
·
verified ·
1 Parent(s): b3db7f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -17
app.py CHANGED
@@ -2,6 +2,10 @@ import os
2
  import io
3
  import torch
4
  import librosa
 
 
 
 
5
  from fastapi import FastAPI, File, UploadFile, HTTPException
6
  from fastapi.middleware.cors import CORSMiddleware
7
  from transformers import Wav2Vec2ForCTC, AutoProcessor
@@ -35,35 +39,57 @@ model.load_adapter("bam")
35
  print("Bambara adapter loaded. System Ready.")
36
 
37
 
 
 
38
  @app.post("/transcribe")
39
  async def transcribe(audio_file: UploadFile = File(...)):
40
  try:
41
- # 1. Read the file into memory
42
  content = await audio_file.read()
43
  if not content:
44
- return {"text": "Error: Empty audio file"}
 
 
 
 
45
 
46
- # 2. Convert to a file-like object
47
- audio_fp = io.BytesIO(content)
48
 
49
- # 3. Load & Resample
50
- # By not specifying 'format', librosa uses ffmpeg to 'sniff' the file.
51
- # This works for WebM, Ogg, WAV, etc., IF ffmpeg is in packages.txt
52
- audio_data, _ = librosa.load(audio_fp, sr=16000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
- # 4. Prepare inputs for the model
55
- inputs = processor(audio_data, sampling_rate=16000, return_tensors="pt").to(device)
56
-
57
- # 5. Run the model
58
  with torch.inference_mode():
59
  logits = model(**inputs).logits
60
 
61
- # 6. Decode output
62
  predicted_ids = torch.argmax(logits, dim=-1)
63
- transcription = processor.batch_decode(predicted_ids)[0]
 
 
64
 
65
- return {"text": transcription}
 
66
 
67
  except Exception as e:
68
- print(f"Server Error: {e}")
69
- return {"text": f"Error: {str(e)}"}
 
2
  import io
3
  import torch
4
  import librosa
5
+ import subprocess
6
+ import tempfile
7
+ import soundfile as sf
8
+ import numpy as np
9
  from fastapi import FastAPI, File, UploadFile, HTTPException
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from transformers import Wav2Vec2ForCTC, AutoProcessor
 
39
  print("Bambara adapter loaded. System Ready.")
40
 
41
 
42
+
43
+
44
  @app.post("/transcribe")
45
  async def transcribe(audio_file: UploadFile = File(...)):
46
  try:
 
47
  content = await audio_file.read()
48
  if not content:
49
+ return {"text": "Empty audio"}
50
+
51
+ # Write WebM to temp file
52
+ with tempfile.NamedTemporaryFile(suffix=".webm") as f_webm, \
53
+ tempfile.NamedTemporaryFile(suffix=".wav") as f_wav:
54
 
55
+ f_webm.write(content)
56
+ f_webm.flush()
57
 
58
+ # Convert WebM WAV (mono, 16kHz)
59
+ subprocess.run(
60
+ [
61
+ "ffmpeg", "-y",
62
+ "-i", f_webm.name,
63
+ "-ac", "1",
64
+ "-ar", "16000",
65
+ f_wav.name
66
+ ],
67
+ stdout=subprocess.DEVNULL,
68
+ stderr=subprocess.DEVNULL,
69
+ check=True
70
+ )
71
+
72
+ # Read WAV
73
+ audio_data, sr = sf.read(f_wav.name)
74
+
75
+ # ASR inference
76
+ inputs = processor(
77
+ audio_data,
78
+ sampling_rate=16000,
79
+ return_tensors="pt"
80
+ ).to(device)
81
 
 
 
 
 
82
  with torch.inference_mode():
83
  logits = model(**inputs).logits
84
 
 
85
  predicted_ids = torch.argmax(logits, dim=-1)
86
+ text = processor.batch_decode(predicted_ids)[0]
87
+
88
+ return {"text": text}
89
 
90
+ except subprocess.CalledProcessError:
91
+ return {"text": "FFmpeg conversion failed"}
92
 
93
  except Exception as e:
94
+ print("Server Error:", e)
95
+ return {"text": str(e)}