ViralCutterPRO / scripts /cut_json.py
RafaG's picture
Upload 85 files
80b326d verified
import json
import os
def process_segments(data, start_time, end_time):
new_segments = []
for segment in data.get('segments', []):
seg_start = segment.get('start', 0)
seg_end = segment.get('end', 0)
# Verifica interseção
if seg_end <= start_time or seg_start >= end_time:
continue
# Calcula overlap
# Ajusta timestamps relativos ao corte
new_seg_start = max(0, seg_start - start_time)
new_seg_end = min(end_time, seg_end) - start_time
# Filtra palavras se existirem
new_words = []
if 'words' in segment:
for word in segment['words']:
w_start = word.get('start', 0)
w_end = word.get('end', 0)
if w_end > start_time and w_start < end_time:
new_w_start = max(0, w_start - start_time)
new_w_end = min(end_time, w_end) - start_time
word_copy = word.copy()
word_copy['start'] = new_w_start
word_copy['end'] = new_w_end
new_words.append(word_copy)
# Se sobraram palavras ou se o segmento é válido no tempo
if new_words or (new_seg_end > new_seg_start):
new_segment = segment.copy()
new_segment['start'] = new_seg_start
new_segment['end'] = new_seg_end
if 'words' in segment:
new_segment['words'] = new_words
new_segments.append(new_segment)
return {'segments': new_segments}
def cut_json_transcript(input_json_path, output_json_path, start_time, end_time):
"""
Lê o input.json (WhisperX), recorta o trecho e salva em output_json_path com timestamps ajustados.
"""
if not os.path.exists(input_json_path):
print(f"Aviso: {input_json_path} não encontrado. Não foi possível gerar JSON do corte.")
return
try:
with open(input_json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
new_data = process_segments(data, start_time, end_time)
with open(output_json_path, 'w', encoding='utf-8') as f:
json.dump(new_data, f, indent=2, ensure_ascii=False)
print(f"JSON de legenda gerado: {output_json_path}")
except Exception as e:
print(f"Erro ao cortar JSON: {e}")