Spaces:
Running
Running
| import json | |
| import os | |
| def process_segments(data, start_time, end_time): | |
| new_segments = [] | |
| for segment in data.get('segments', []): | |
| seg_start = segment.get('start', 0) | |
| seg_end = segment.get('end', 0) | |
| # Verifica interseção | |
| if seg_end <= start_time or seg_start >= end_time: | |
| continue | |
| # Calcula overlap | |
| # Ajusta timestamps relativos ao corte | |
| new_seg_start = max(0, seg_start - start_time) | |
| new_seg_end = min(end_time, seg_end) - start_time | |
| # Filtra palavras se existirem | |
| new_words = [] | |
| if 'words' in segment: | |
| for word in segment['words']: | |
| w_start = word.get('start', 0) | |
| w_end = word.get('end', 0) | |
| if w_end > start_time and w_start < end_time: | |
| new_w_start = max(0, w_start - start_time) | |
| new_w_end = min(end_time, w_end) - start_time | |
| word_copy = word.copy() | |
| word_copy['start'] = new_w_start | |
| word_copy['end'] = new_w_end | |
| new_words.append(word_copy) | |
| # Se sobraram palavras ou se o segmento é válido no tempo | |
| if new_words or (new_seg_end > new_seg_start): | |
| new_segment = segment.copy() | |
| new_segment['start'] = new_seg_start | |
| new_segment['end'] = new_seg_end | |
| if 'words' in segment: | |
| new_segment['words'] = new_words | |
| new_segments.append(new_segment) | |
| return {'segments': new_segments} | |
| def cut_json_transcript(input_json_path, output_json_path, start_time, end_time): | |
| """ | |
| Lê o input.json (WhisperX), recorta o trecho e salva em output_json_path com timestamps ajustados. | |
| """ | |
| if not os.path.exists(input_json_path): | |
| print(f"Aviso: {input_json_path} não encontrado. Não foi possível gerar JSON do corte.") | |
| return | |
| try: | |
| with open(input_json_path, 'r', encoding='utf-8') as f: | |
| data = json.load(f) | |
| new_data = process_segments(data, start_time, end_time) | |
| with open(output_json_path, 'w', encoding='utf-8') as f: | |
| json.dump(new_data, f, indent=2, ensure_ascii=False) | |
| print(f"JSON de legenda gerado: {output_json_path}") | |
| except Exception as e: | |
| print(f"Erro ao cortar JSON: {e}") | |