narendraa commited on
Commit
9dcdc97
Β·
verified Β·
1 Parent(s): 02a9c21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -12
app.py CHANGED
@@ -7,8 +7,8 @@ import os
7
 
8
  app = FastAPI()
9
 
10
- # βœ… Use multilingual Whisper model with auto language detection
11
- # (You can try "small" if performance allows β€” more accurate for Indian languages)
12
  asr = pipeline(
13
  "automatic-speech-recognition",
14
  model="openai/whisper-base",
@@ -17,14 +17,14 @@ asr = pipeline(
17
 
18
  @app.post("/predict")
19
  async def predict(file: UploadFile = File(...)):
20
- input_path = "/tmp/input.webm"
21
- wav_path = "/tmp/input.wav"
22
 
23
  # Save uploaded file
24
  with open(input_path, "wb") as f:
25
  f.write(await file.read())
26
 
27
- # Convert WebM β†’ WAV
28
  subprocess.run([
29
  "ffmpeg", "-y", "-i", input_path,
30
  "-ac", "1", "-ar", "16000", wav_path
@@ -34,19 +34,22 @@ async def predict(file: UploadFile = File(...)):
34
  waveform, sr = torchaudio.load(wav_path)
35
  waveform = waveform.to(torch.float32)
36
 
37
- # βœ… Auto-detect language and transcribe (no translation)
 
38
  result = asr(
39
  {"array": waveform[0].numpy(), "sampling_rate": sr},
40
- generate_kwargs={"task": "transcribe"} # key for multilingual auto-detection
 
 
 
41
  )
42
 
43
- # Cleanup
44
  os.remove(input_path)
45
  os.remove(wav_path)
46
 
47
- # βœ… Return text + detected language
48
  return {
49
- "text": result["text"],
50
- "language": result.get("language", "auto-detected"),
51
- "note": "Auto language detection enabled β€” supports Hindi, Tamil, Bengali, etc."
52
  }
 
7
 
8
  app = FastAPI()
9
 
10
+ # βœ… Multilingual model (better Hindi-English support than tiny)
11
+ # You can switch to "openai/whisper-small" for even better accuracy if your container allows.
12
  asr = pipeline(
13
  "automatic-speech-recognition",
14
  model="openai/whisper-base",
 
17
 
18
  @app.post("/predict")
19
  async def predict(file: UploadFile = File(...)):
20
+ input_path = "/tmp/input_audio.webm"
21
+ wav_path = "/tmp/input_audio.wav"
22
 
23
  # Save uploaded file
24
  with open(input_path, "wb") as f:
25
  f.write(await file.read())
26
 
27
+ # Convert to 16 kHz mono WAV β€” ensures consistency
28
  subprocess.run([
29
  "ffmpeg", "-y", "-i", input_path,
30
  "-ac", "1", "-ar", "16000", wav_path
 
34
  waveform, sr = torchaudio.load(wav_path)
35
  waveform = waveform.to(torch.float32)
36
 
37
+ # βœ… Transcribe with automatic language detection
38
+ # The 'task': 'transcribe' ensures Whisper writes what it hears, no translation.
39
  result = asr(
40
  {"array": waveform[0].numpy(), "sampling_rate": sr},
41
+ generate_kwargs={
42
+ "task": "transcribe", # disables translation
43
+ "language": None # auto-detect language
44
+ }
45
  )
46
 
47
+ # Cleanup temp files
48
  os.remove(input_path)
49
  os.remove(wav_path)
50
 
 
51
  return {
52
+ "text": result["text"].strip(),
53
+ "language": result.get("language", "auto"),
54
+ "note": "Auto language detection enabled. Optimized for Hindi + English speech."
55
  }