| from pydub.silence import detect_nonsilent
|
| from pydub import AudioSegment
|
| import numpy as np
|
| import re
|
| import os
|
|
|
| from rvc.lib.utils import format_title
|
|
|
|
|
| def process_audio(file_path):
|
| try:
|
|
|
| song = AudioSegment.from_file(file_path)
|
|
|
|
|
| silence_thresh = -70
|
| min_silence_len = 750
|
|
|
|
|
| nonsilent_parts = detect_nonsilent(
|
| song, min_silence_len=min_silence_len, silence_thresh=silence_thresh
|
| )
|
|
|
|
|
| file_dir = os.path.dirname(file_path)
|
| file_name = os.path.basename(file_path).split(".")[0]
|
| file_name = format_title(file_name)
|
| new_dir_path = os.path.join(file_dir, file_name)
|
| os.makedirs(new_dir_path, exist_ok=True)
|
|
|
|
|
| timestamps_file = os.path.join(file_dir, f"{file_name}_timestamps.txt")
|
| if os.path.isfile(timestamps_file):
|
| os.remove(timestamps_file)
|
|
|
|
|
| segment_count = 0
|
| for i, (start_i, end_i) in enumerate(nonsilent_parts):
|
| chunk = song[start_i:end_i]
|
| chunk_file_path = os.path.join(new_dir_path, f"chunk{i}.wav")
|
| chunk.export(chunk_file_path, format="wav")
|
|
|
| print(f"Segment {i} created!")
|
| segment_count += 1
|
|
|
|
|
| with open(timestamps_file, "a", encoding="utf-8") as f:
|
| f.write(f"{chunk_file_path} starts at {start_i} ms\n")
|
|
|
| print(f"Total segments created: {segment_count}")
|
| print(f"Split all chunks for {file_path} successfully!")
|
|
|
| return "Finish", new_dir_path
|
|
|
| except Exception as e:
|
| print(f"An error occurred: {e}")
|
| return "Error", None
|
|
|
|
|
| def merge_audio(timestamps_file):
|
| try:
|
|
|
| prefix = os.path.basename(timestamps_file).replace("_timestamps.txt", "")
|
| timestamps_dir = os.path.dirname(timestamps_file)
|
|
|
|
|
| with open(timestamps_file, "r", encoding="utf-8") as f:
|
| lines = f.readlines()
|
|
|
|
|
| audio_segments = []
|
| last_end_time = 0
|
|
|
| print(f"Processing file: {timestamps_file}")
|
|
|
| for line in lines:
|
|
|
| match = re.search(r"(chunk\d+.wav) starts at (\d+) ms", line)
|
| if match:
|
| filename, start_time = match.groups()
|
| start_time = int(start_time)
|
|
|
|
|
| chunk_file = os.path.join(timestamps_dir, prefix, filename)
|
|
|
|
|
| silence_duration = max(start_time - last_end_time, 0)
|
| silence = AudioSegment.silent(duration=silence_duration)
|
| audio_segments.append(silence)
|
|
|
|
|
| audio = AudioSegment.from_wav(chunk_file)
|
| audio_segments.append(audio)
|
|
|
|
|
| last_end_time = start_time + len(audio)
|
|
|
| print(f"Processed chunk: {chunk_file}")
|
|
|
|
|
| merged_audio = sum(audio_segments)
|
| merged_audio_np = np.array(merged_audio.get_array_of_samples())
|
|
|
| return merged_audio.frame_rate, merged_audio_np
|
|
|
| except Exception as e:
|
| print(f"An error occurred: {e}")
|
|
|