Spaces:

atlasia
/

MoulSot

Sleeping

App Files Files Community

MoulSot / app.py

abdeljalilELmajjodi

Update app.py

e1b40db verified 2 months ago

raw

history blame

2.06 kB

	import gradio as gr
	import torchaudio
	from transformers import pipeline

	# Load only the Moul-Sout-100 model
	asr_pipeline = pipeline("automatic-speech-recognition", model="01Yassine/moulsot_v0.2_1000")

	# Adjust generation config if necessary
	asr_pipeline.model.generation_config.input_ids = asr_pipeline.model.generation_config.forced_decoder_ids
	asr_pipeline.model.generation_config.forced_decoder_ids = None


	def ensure_mono_16k(audio_path):
	"""Load audio, convert to mono + 16kHz, and save a temp version."""
	waveform, sr = torchaudio.load(audio_path)

	# Convert to mono if necessary
	if waveform.shape[0] > 1:
	waveform = waveform.mean(dim=0, keepdim=True)

	# Resample to 16kHz if necessary
	if sr != 16000:
	resampler = torchaudio.transforms.Resample(sr, 16000)
	waveform = resampler(waveform)
	sr = 16000

	tmp_path = "/tmp/processed_16k.wav"
	torchaudio.save(tmp_path, waveform, sr)
	return tmp_path


	def transcribe(audio):
	if audio is None:
	return "Please record or upload an audio file."

	# Process and transcribe
	processed_audio = ensure_mono_16k(audio)
	result = asr_pipeline(processed_audio)["text"]

	return result


	title = "🎙️ Moul-Sout ASR 🇲🇦"
	description = """
	Moul-Sout model for Darija ASR 🇲🇦.
	You can record or upload an audio sample (it will be automatically resampled to 16 kHz mono),
	and view the transcription result below.
	"""

	with gr.Blocks(title=title) as demo:
	gr.Markdown(f"# {title}\n{description}")

	with gr.Row():
	audio_input = gr.Audio(
	sources=["microphone", "upload"],
	type="filepath",
	label="🎤 Record or Upload Audio (auto 16 kHz mono)"
	)

	transcribe_btn = gr.Button("🚀 Transcribe")

	output_text = gr.Textbox(label="🟩 Transcription Output")

	transcribe_btn.click(
	fn=transcribe,
	inputs=[audio_input],
	outputs=[output_text]
	)

	# Local launch
	if __name__ == "__main__":
	demo.launch()