Spaces:

narendraa
/

Auralyn

Sleeping

Auralyn / app.py

Update app.py

9dcdc97 verified 6 months ago

1.66 kB

	from fastapi import FastAPI, UploadFile, File
	from transformers import pipeline
	import torchaudio
	import torch
	import subprocess
	import os

	app = FastAPI()

	# ✅ Multilingual model (better Hindi-English support than tiny)
	# You can switch to "openai/whisper-small" for even better accuracy if your container allows.
	asr = pipeline(
	"automatic-speech-recognition",
	model="openai/whisper-base",
	device="cpu"
	)

	@app.post("/predict")
	async def predict(file: UploadFile = File(...)):
	input_path = "/tmp/input_audio.webm"
	wav_path = "/tmp/input_audio.wav"

	# Save uploaded file
	with open(input_path, "wb") as f:
	f.write(await file.read())

	# Convert to 16 kHz mono WAV — ensures consistency
	subprocess.run([
	"ffmpeg", "-y", "-i", input_path,
	"-ac", "1", "-ar", "16000", wav_path
	], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

	# Load waveform
	waveform, sr = torchaudio.load(wav_path)
	waveform = waveform.to(torch.float32)

	# ✅ Transcribe with automatic language detection
	# The 'task': 'transcribe' ensures Whisper writes what it hears, no translation.
	result = asr(
	{"array": waveform[0].numpy(), "sampling_rate": sr},
	generate_kwargs={
	"task": "transcribe", # disables translation
	"language": None # auto-detect language
	}
	)

	# Cleanup temp files
	os.remove(input_path)
	os.remove(wav_path)

	return {
	"text": result["text"].strip(),
	"language": result.get("language", "auto"),
	"note": "Auto language detection enabled. Optimized for Hindi + English speech."
	}