import gradio as gr import os import zipfile import tempfile import spaces import torch from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline from huggingface_hub import hf_hub_download from datasets import load_dataset from whisper import load_model, transcribe from faster_whisper import WhisperModel @spaces.GPU def transcribe_audio(zip_file, progress = gr.Progress(track_tqdm= True)): model = WhisperModel("large-v3") # Create a temporary directory to extract the ZIP file with tempfile.TemporaryDirectory() as temp_dir: # Extract ZIP file with zipfile.ZipFile(zip_file, 'r') as zip_ref: zip_ref.extractall(temp_dir) # Get list of audio files audio_files = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if f.endswith(('.wav', '.mp3', '.flac'))] # Transcribe each audio file transcriptions = {} for audio_file in audio_files: segments, info = model.transcribe(audio_file) for segment in segments: return "[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text) transcriptions[os.path.basename(audio_file)] = transcription return transcriptions # Define the Gradio interface interface = gr.Interface( fn=transcribe_audio, inputs=gr.File(label="Upload ZIP file", type="filepath", file_types=[".zip"]), outputs=gr.Textbox(label="Transcriptions"), title="Audio Transcription with Whisper (Portuguese)", description="Upload a ZIP file containing Portuguese audio files, and this tool will transcribe them to Portuguese." ) # Launch the Gradio app interface.launch()