16pramodh's picture
Update app.py
3e38aa7 verified
from transformers import pipeline,WhisperProcessor
import gradio as gr
import os
# Set the cache directory for Hugging Face models to ensure they are saved within the Space
os.environ['HUGGINGFACE_HUB_CACHE'] = '/app/.cache/huggingface/hub'
# The name of the model you want to use
model_name = "16pramodh/ASR_YAP"
processor = WhisperProcessor.from_pretrained("openai/whisper-small.en")
# Load the ASR pipeline
pipe = pipeline(
"automatic-speech-recognition",
model=model_name,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
device=0, # Use GPU if available on paid tiers, otherwise falls back to CPU
)
# Define the transcription function that Gradio will expose as an API
def transcribe_audio(audio_file_path):
if audio_file_path is None:
return "No audio file provided."
# The pipeline can directly process the path to the audio file
transcription = pipe(audio_file_path)["text"]
return transcription
# Create the Gradio Interface
iface = gr.Interface(
fn=transcribe_audio,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="Whisper Indian Accent ASR API",
description="An API endpoint for a fine-tuned Whisper model.",
)
# Launch the Gradio app
if __name__ == "__main__":
iface.launch()