Spaces:

16pramodh
/

Phase1-Speech_To_Text

Sleeping

Update app.py

3e38aa7 verified 8 months ago

1.3 kB

	from transformers import pipeline,WhisperProcessor
	import gradio as gr
	import os

	# Set the cache directory for Hugging Face models to ensure they are saved within the Space
	os.environ['HUGGINGFACE_HUB_CACHE'] = '/app/.cache/huggingface/hub'

	# The name of the model you want to use
	model_name = "16pramodh/ASR_YAP"
	processor = WhisperProcessor.from_pretrained("openai/whisper-small.en")

	# Load the ASR pipeline
	pipe = pipeline(
	"automatic-speech-recognition",
	model=model_name,
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	device=0, # Use GPU if available on paid tiers, otherwise falls back to CPU
	)

	# Define the transcription function that Gradio will expose as an API
	def transcribe_audio(audio_file_path):
	if audio_file_path is None:
	return "No audio file provided."

	# The pipeline can directly process the path to the audio file
	transcription = pipe(audio_file_path)["text"]
	return transcription

	# Create the Gradio Interface
	iface = gr.Interface(
	fn=transcribe_audio,
	inputs=gr.Audio(type="filepath"),
	outputs="text",
	title="Whisper Indian Accent ASR API",
	description="An API endpoint for a fine-tuned Whisper model.",
	)

	# Launch the Gradio app
	if __name__ == "__main__":
	iface.launch()