Spaces:

HaoVuong
/

MedicalASR

Sleeping

title change

828ed6a 5 months ago

1.46 kB

	import torch
	import gradio as gr
	from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline

	device = "cpu"
	torch_dtype = torch.float32

	fine_tuned_model_id = "leduckhai/MultiMed-ST"
	fine_tuned_subfolder = "asr/whisper-small-english/checkpoint"

	print("Loading model on CPU... this may take a moment.")

	model = AutoModelForSpeechSeq2Seq.from_pretrained(
	fine_tuned_model_id,
	subfolder=fine_tuned_subfolder,
	torch_dtype=torch_dtype,
	low_cpu_mem_usage=True,
	use_safetensors=True
	).to(device)

	processor = AutoProcessor.from_pretrained("openai/whisper-small")

	asr_pipeline = pipeline(
	"automatic-speech-recognition",
	model=model,
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	max_new_tokens=128,
	chunk_length_s=30,
	batch_size=16,
	return_timestamps=True,
	torch_dtype=torch_dtype,
	device=device
	)

	def transcribe_audio(audio_path):
	if audio_path is None:
	return "No audio found."

	print(f"Transcribing: {audio_path}")
	result = asr_pipeline(audio_path, generate_kwargs={"language": "en", "task": "transcribe"})
	return result['text']

	demo = gr.Interface(
	fn=transcribe_audio,
	inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
	outputs="text",
	title="Capstone Medical ASR",
	description="Running on CPU. Processing might take a few seconds."
	)

	if __name__ == "__main__":
	demo.launch()