File size: 2,689 Bytes
b5a064f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import os
from typing import *

import tqdm
from pydub import AudioSegment
from pydub.silence import split_on_silence


def separate_audio(
    input: str,
    output: str,
    silence_thresh: int,
    min_silence_len: int = 1000,
    keep_silence: int = 100,
    margin: int = 0,
    padding: bool = False,
    min: Optional[int] = None,
    max: Optional[int] = None,
):
    if os.path.isfile(input):
        input = [input]
    elif os.path.isdir(input):
        input = [os.path.join(input, f) for f in os.listdir(input)]
    else:
        raise ValueError("input must be a file or directory")

    os.makedirs(output, exist_ok=True)

    for file in input:
        if os.path.splitext(file)[1] == ".mp3":
            audio = AudioSegment.from_mp3(file)
        elif os.path.splitext(file)[1] == ".wav":
            audio = AudioSegment.from_wav(file)
        elif os.path.splitext(file)[1] == ".flac":
            audio = AudioSegment.from_file(file, "flac")
        else:
            raise ValueError(
                "Invalid file format. Only MP3 and WAV files are supported."
            )

        chunks = split_on_silence(
            audio,
            min_silence_len=min_silence_len,
            silence_thresh=silence_thresh,
            keep_silence=keep_silence,
        )

        output_chunks: List[AudioSegment] = []

        so_short = None

        for chunk in tqdm.tqdm(chunks):
            if so_short is not None:
                chunk = so_short + chunk
                so_short = None
            if min is None or len(chunk) > min:
                if max is not None and len(chunk) > max:
                    sub_chunks = [
                        chunk[i : i + max + margin]
                        for i in range(0, len(chunk) - margin, max)
                    ]

                    if len(sub_chunks[-1]) < min:
                        if padding and len(sub_chunks) > 2:
                            output_chunks.extend(sub_chunks[0:-2])
                            output_chunks.append(sub_chunks[-2] + sub_chunks[-1])
                        else:
                            output_chunks.extend(sub_chunks[0:-1])
                    else:
                        output_chunks.extend(sub_chunks)
                else:
                    output_chunks.append(chunk)
            else:
                if so_short is None:
                    so_short = chunk
                else:
                    so_short += chunk
        basename = os.path.splitext(os.path.basename(file))[0]

        for i, chunk in enumerate(output_chunks):
            filepath = os.path.join(output, f"{basename}_{i}.wav")
            chunk.export(filepath, format="wav")