Spaces:
Runtime error
Runtime error
File size: 1,694 Bytes
cfe6ce8 9ca487b e41bca8 2b9af99 e41bca8 2b9af99 f61e9e0 7f4aeba cfe6ce8 9ca487b f61e9e0 6118644 7f4aeba cfe6ce8 3409be8 bcdce3f cfe6ce8 0e795e8 73d5ca1 fef90e0 cfe6ce8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import gradio as gr
import os
import zipfile
import tempfile
import spaces
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from huggingface_hub import hf_hub_download
from datasets import load_dataset
from whisper import load_model, transcribe
from faster_whisper import WhisperModel
@spaces.GPU
def transcribe_audio(zip_file, progress = gr.Progress(track_tqdm= True)):
model = WhisperModel("large-v3")
# Create a temporary directory to extract the ZIP file
with tempfile.TemporaryDirectory() as temp_dir:
# Extract ZIP file
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
zip_ref.extractall(temp_dir)
# Get list of audio files
audio_files = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if f.endswith(('.wav', '.mp3', '.flac'))]
# Transcribe each audio file
transcriptions = {}
for audio_file in audio_files:
segments, info = model.transcribe(audio_file)
for segment in segments:
return "[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text)
transcriptions[os.path.basename(audio_file)] = transcription
return transcriptions
# Define the Gradio interface
interface = gr.Interface(
fn=transcribe_audio,
inputs=gr.File(label="Upload ZIP file", type="filepath", file_types=[".zip"]),
outputs=gr.Textbox(label="Transcriptions"),
title="Audio Transcription with Whisper (Portuguese)",
description="Upload a ZIP file containing Portuguese audio files, and this tool will transcribe them to Portuguese."
)
# Launch the Gradio app
interface.launch()
|