ViralCutterPRO / scripts /transcribe_cuts.py
RafaG's picture
Upload 85 files
80b326d verified
import os
import subprocess
import sys
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
def transcribe(project_folder="tmp"):
def generate_whisperx(input_file, output_folder, model='large-v3'):
output_file = os.path.join(output_folder, f"{os.path.splitext(os.path.basename(input_file))[0]}.srt")
json_file = os.path.join(output_folder, f"{os.path.splitext(os.path.basename(input_file))[0]}.json") # Define the JSON output file
# Skip processing if the JSON file already exists
if os.path.exists(json_file):
print(f"Arquivo já existe, pulando: {json_file}")
return
command = [
"whisperx",
input_file,
"--model", model,
"--task", "transcribe",
"--align_model", "WAV2VEC2_ASR_LARGE_LV60K_960H",
"--chunk_size", "10",
"--vad_onset", "0.4",
"--vad_offset", "0.3",
"--compute_type", "float32",
"--batch_size", "10",
"--output_dir", output_folder,
"--output_format", "srt",
"--output_format", "json",
]
print(f"Transcrevendo: {input_file}...")
result = subprocess.run(command, shell=True, text=True, capture_output=True)
print(f"Comando executado: {command}")
if result.returncode != 0:
print("Erro durante a transcrição:")
print(result.stderr)
else:
print(f"Transcrição concluída. Arquivo salvo em: {output_file} e {json_file}")
# print(result.stdout)
# Define o diretório de entrada e o diretório de saída
input_folder = os.path.join(project_folder, 'final')
output_folder = os.path.join(project_folder, 'subs')
os.makedirs(output_folder, exist_ok=True)
if not os.path.exists(input_folder):
print(f"Pasta de entrada não encontrada: {input_folder}")
return
# Itera sobre todos os arquivos na pasta de entrada
for filename in os.listdir(input_folder):
if filename.endswith('.mp4'): # Filtra apenas arquivos .mp4
input_file = os.path.join(input_folder, filename)
generate_whisperx(input_file, output_folder)