tts-maker / app.py
rafaaa2105's picture
Update app.py
73d5ca1 verified
import gradio as gr
import os
import zipfile
import tempfile
import spaces
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from huggingface_hub import hf_hub_download
from datasets import load_dataset
from whisper import load_model, transcribe
from faster_whisper import WhisperModel
@spaces.GPU
def transcribe_audio(zip_file, progress = gr.Progress(track_tqdm= True)):
model = WhisperModel("large-v3")
# Create a temporary directory to extract the ZIP file
with tempfile.TemporaryDirectory() as temp_dir:
# Extract ZIP file
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
zip_ref.extractall(temp_dir)
# Get list of audio files
audio_files = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if f.endswith(('.wav', '.mp3', '.flac'))]
# Transcribe each audio file
transcriptions = {}
for audio_file in audio_files:
segments, info = model.transcribe(audio_file)
for segment in segments:
return "[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text)
transcriptions[os.path.basename(audio_file)] = transcription
return transcriptions
# Define the Gradio interface
interface = gr.Interface(
fn=transcribe_audio,
inputs=gr.File(label="Upload ZIP file", type="filepath", file_types=[".zip"]),
outputs=gr.Textbox(label="Transcriptions"),
title="Audio Transcription with Whisper (Portuguese)",
description="Upload a ZIP file containing Portuguese audio files, and this tool will transcribe them to Portuguese."
)
# Launch the Gradio app
interface.launch()