audiototext / app.py
Abhinay0806's picture
Update app.py
fe3c9b9 verified
# import gradio as gr
# from transformers import pipeline
# # Load an audio classification pipeline (you can replace it with your desired model)
# audio_classification_pipeline = pipeline(task="automatic-speech-recognition", model="openai/whisper-small")
# def predict(input_audio):
# # Use the audio classification pipeline to predict
# transcription = audio_classification_pipeline(input_audio)["text"]
# return transcription
# # Gradio interface setup for audio classification
# gradio_app = gr.Interface(
# predict,
# inputs=gr.Audio(label="Upload or Record Audio", type="numpy"), # Audio input
# outputs=gr.Textbox(label="Transcription Result"), # Output is transcription text
# title="Audio Classification: Speech-to-Text",
# description="Upload or record an audio file to transcribe it using the Whisper model."
# )
# # Launch the app
# if __name__ == "__main__":
# gradio_app.launch()
import gradio as gr
from transformers import pipeline
# Load the Whisper speech-to-text pipeline
audio_classification_pipeline = pipeline(task="automatic-speech-recognition", model="openai/whisper-small")
def predict(input_audio):
if input_audio is None:
return "Please upload an audio file."
# Gradio audio input returns a tuple: (audio array, sample rate)
audio_data, sample_rate = input_audio
# Whisper expects a dictionary with keys 'array' and 'sampling_rate'
transcription = audio_classification_pipeline({"array": audio_data, "sampling_rate": sample_rate})["text"]
return transcription
# Gradio interface setup for audio classification
gradio_app = gr.Interface(
fn=predict,
inputs=gr.Audio(label="Upload or Record Audio", type="numpy"), # Audio input
outputs=gr.Textbox(label="Transcription Result"), # Output is transcription text
title="Audio Classification: Speech-to-Text",
description="Upload or record an audio file to transcribe it using the Whisper model."
)
# Launch the app
if __name__ == "__main__":
gradio_app.launch()