faster-whisper / app.py
soamikapadia's picture
Increased concurrency
5385eaf
import gradio as gr
from faster_whisper import WhisperModel
from time import time
import logging
import json
# Initialize logging
logging.basicConfig()
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
CHOICES = [
"tiny", "tiny.en", "base",
"base.en", "small", "small.en",
"medium", "medium.en", "large-v1",
"large-v2", "large-v3", "large"
]
# Function to load model
def load_model(model):
download_path_int8 = "int8" # Adjust path as needed for Hugging Face Spaces
return WhisperModel(model, device="auto", compute_type="int8", download_root=download_path_int8)
# Current model (default to small)
current_model = load_model("small")
def transcribe(audio_file, model):
global current_model
# Load the model if different size is selected
if current_model.model != model:
current_model = load_model(model)
start = time()
segments, info = current_model.transcribe(
audio_file,
vad_filter=True,
vad_parameters=dict(min_silence_duration_ms=500),
)
# Prepare JSON output
transcript = [{"start": segment.start, "end": segment.end, "text": segment.text} for segment in segments]
print(f"Time Taken to transcribe: {time() - start}")
output = {
"language": info.language,
"language_probability": info.language_probability,
"transcript": transcript
}
return json.dumps(output, indent=4)
# Define Gradio interface
iface = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(type="filepath", label="Upload MP3 Audio File"),
gr.Dropdown(choices=CHOICES, value="small", label="Model")
],
outputs=gr.JSON(label="Transcription with Timestamps"),
title="Whisper Transcription Service",
description="Upload an MP3 audio file to transcribe. Select the model. The output includes the transcription with timestamps.",
concurrency_limit=2
)
# Launch the app
if __name__ == "__main__":
iface.launch()