File size: 1,694 Bytes
cfe6ce8
 
 
 
9ca487b
e41bca8
 
2b9af99
e41bca8
2b9af99
f61e9e0
7f4aeba
cfe6ce8
9ca487b
f61e9e0
 
6118644
7f4aeba
cfe6ce8
 
 
 
 
 
 
 
 
 
 
 
3409be8
bcdce3f
 
cfe6ce8
 
 
 
 
 
 
0e795e8
73d5ca1
fef90e0
 
cfe6ce8
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import gradio as gr
import os
import zipfile
import tempfile
import spaces
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from huggingface_hub import hf_hub_download
from datasets import load_dataset
from whisper import load_model, transcribe
from faster_whisper import WhisperModel


@spaces.GPU
def transcribe_audio(zip_file, progress = gr.Progress(track_tqdm= True)):    
    model = WhisperModel("large-v3")
      
    
    # Create a temporary directory to extract the ZIP file
    with tempfile.TemporaryDirectory() as temp_dir:
        # Extract ZIP file
        with zipfile.ZipFile(zip_file, 'r') as zip_ref:
            zip_ref.extractall(temp_dir)

        # Get list of audio files
        audio_files = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if f.endswith(('.wav', '.mp3', '.flac'))]

        # Transcribe each audio file
        transcriptions = {}
        for audio_file in audio_files:
            segments, info = model.transcribe(audio_file)
            for segment in segments:
                return "[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text)
            transcriptions[os.path.basename(audio_file)] = transcription
        
    return transcriptions

# Define the Gradio interface
interface = gr.Interface(
    fn=transcribe_audio,
    inputs=gr.File(label="Upload ZIP file", type="filepath", file_types=[".zip"]),
    outputs=gr.Textbox(label="Transcriptions"),
    title="Audio Transcription with Whisper (Portuguese)",
    description="Upload a ZIP file containing Portuguese audio files, and this tool will transcribe them to Portuguese."
)

# Launch the Gradio app
interface.launch()