File size: 2,319 Bytes
80b326d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
import subprocess
import sys

sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

def transcribe(project_folder="tmp"):
    def generate_whisperx(input_file, output_folder, model='large-v3'):
        output_file = os.path.join(output_folder, f"{os.path.splitext(os.path.basename(input_file))[0]}.srt")
        json_file = os.path.join(output_folder, f"{os.path.splitext(os.path.basename(input_file))[0]}.json")  # Define the JSON output file

        # Skip processing if the JSON file already exists
        if os.path.exists(json_file):
            print(f"Arquivo já existe, pulando: {json_file}")
            return

        command = [
            "whisperx",
            input_file,
            "--model", model,
            "--task", "transcribe",
            "--align_model", "WAV2VEC2_ASR_LARGE_LV60K_960H",
            "--chunk_size", "10",
            "--vad_onset", "0.4",
            "--vad_offset", "0.3",
            "--compute_type", "float32",
            "--batch_size", "10",
            "--output_dir", output_folder,
            "--output_format", "srt",
            "--output_format", "json",
        ]

        print(f"Transcrevendo: {input_file}...")
        result = subprocess.run(command, shell=True, text=True, capture_output=True)
        print(f"Comando executado: {command}")
        
        if result.returncode != 0:
            print("Erro durante a transcrição:")
            print(result.stderr)
        else:
            print(f"Transcrição concluída. Arquivo salvo em: {output_file} e {json_file}")
            # print(result.stdout) 

    # Define o diretório de entrada e o diretório de saída
    input_folder = os.path.join(project_folder, 'final')
    output_folder = os.path.join(project_folder, 'subs')
    os.makedirs(output_folder, exist_ok=True)

    if not os.path.exists(input_folder):
        print(f"Pasta de entrada não encontrada: {input_folder}")
        return

    # Itera sobre todos os arquivos na pasta de entrada
    for filename in os.listdir(input_folder):
        if filename.endswith('.mp4'):  # Filtra apenas arquivos .mp4
            input_file = os.path.join(input_folder, filename)
            generate_whisperx(input_file, output_folder)