File size: 4,793 Bytes
b1e9205
 
a1a52f9
b1e9205
 
 
e015c08
b1e9205
 
218960f
 
 
 
 
 
 
 
 
 
 
 
 
b1e9205
 
 
 
 
 
 
 
 
 
 
 
 
 
218960f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1e9205
 
e015c08
 
 
 
218960f
 
 
e015c08
218960f
 
e015c08
218960f
 
 
 
 
 
 
 
 
 
 
 
e015c08
 
b1e9205
 
 
e015c08
b1e9205
e015c08
b1e9205
e015c08
 
 
 
 
 
 
 
b1e9205
e015c08
2d3c912
a1a52f9
e015c08
b1e9205
e015c08
 
 
 
 
 
 
 
 
 
 
 
 
b1e9205
e015c08
 
 
 
 
 
 
 
 
b1e9205
e015c08
 
 
b1e9205
2d3c912
a1a52f9
 
e015c08
 
 
 
2d3c912
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import os
import argparse
from tqdm import tqdm

START = 00
FOLDER = "chunks"
DEBUG = True

def seconds_to_hms(seconds):
    """
    Convert seconds to hours, minutes, seconds format.
    
    Args:
        seconds (int): Total number of seconds to convert
        
    Returns:
        tuple: A tuple containing (hours, minutes, seconds)
        
    Example:
        >>> seconds_to_hms(3665)
        (1, 1, 5)  # 1 hour, 1 minute, 5 seconds
    """
    hour = 00
    minute = 00
    second = seconds

    while second >= 60:
        minute += 1
        second -= 60
        while minute >= 60:
            hour += 1
            minute -= 60

    return hour, minute, second

def hms_to_seconds(hour, minute, second):
    """
    Convert hours, minutes, seconds to total seconds.
    
    Args:
        hour (int): Number of hours
        minute (int): Number of minutes
        second (int): Number of seconds
        
    Returns:
        int: Total number of seconds
        
    Example:
        >>> hms_to_seconds(1, 1, 5)
        3665  # 1 hour + 1 minute + 5 seconds in seconds
    """
    return hour*3600 + minute*60 + second

def slice_audio(input_audio_path, output_folder, chunks_seconds, chunk_overlap_seconds):
    """
    Slice audio into chunks with specified duration and overlap.
    
    This function takes an audio file and splits it into smaller chunks with a specified
    duration and overlap between chunks. It uses ffmpeg for the actual audio processing.
    
    Args:
        input_audio_path (str): Path to the input audio file
        output_folder (str): Directory where the chunks will be saved
        chunks_seconds (int): Duration of each chunk in seconds
        chunk_overlap_seconds (int): Amount of overlap between consecutive chunks in seconds
        
    Returns:
        None: Creates audio chunks in the specified output folder and generates
              a text file listing all chunk files
        
    Raises:
        ValueError: If chunk_overlap_seconds is greater than or equal to chunks_seconds
        
    Example:
        >>> slice_audio("input.mp3", "chunks", 30, 5)
        # Creates chunks of 30 seconds with 5 seconds overlap
    """
    _, filename = os.path.split(input_audio_path)
    name, extension = os.path.splitext(filename)

    # Get audio duration in seconds
    duration = float(os.popen(f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {input_audio_path}').read())
    hour, minute, second = seconds_to_hms(int(duration))
    print(f"\tDuration ({duration} seconds): {hour:02d}:{minute:02d}:{second:02d}")

    # Calculate effective chunk duration considering overlap
    effective_chunk = chunks_seconds - chunk_overlap_seconds
    
    # Calculate number of chunks needed
    if effective_chunk > 0:
        num_chunks = -(-int(duration - chunk_overlap_seconds) // effective_chunk)  # Ceiling division
    else:
        raise ValueError("Overlap duration must be less than chunk duration")

    # Slice audio into chunks with overlap
    output_files = []
    progress_bar = tqdm(total=num_chunks, desc="Slice audio into chunks progress")
    
    for chunk in range(num_chunks):
        # Calculate start and end times for this chunk
        start_time = chunk * effective_chunk
        end_time = min(start_time + chunks_seconds, duration)
        
        # Convert times to HH:MM:SS format
        hour_start, minute_start, second_start = seconds_to_hms(start_time)
        
        # Calculate chunk duration
        chunk_duration = end_time - start_time
        hour_duration, minute_duration, second_duration = seconds_to_hms(chunk_duration)
        
        # Generate output filename
        output = f"{output_folder}/{name}_chunk{chunk:003d}{extension}"

        if DEBUG:
            if os.path.exists(output):
                output_files.append(output)
                progress_bar.update(1)
                continue
        
        # Build ffmpeg command with -y flag to overwrite without asking
        if chunk == num_chunks - 1:  # Last chunk
            command = f'ffmpeg -y -i {input_audio_path} -ss {hour_start:02d}:{minute_start:02d}:{second_start:02d} -loglevel error {output}'
        else:
            command = f'ffmpeg -y -i {input_audio_path} -ss {hour_start:02d}:{minute_start:02d}:{second_start:02d} -t {hour_duration:02d}:{minute_duration:02d}:{second_duration:02d} -loglevel error {output}'
        
        # Execute command
        os.system(command)
        output_files.append(output)
        progress_bar.update(1)

    progress_bar.close()

    # Write output files to a txt file (with overwrite)
    with open(f"{output_folder}/output_files.txt", "w") as f:
        for output_file in output_files:
            f.write(f"{output_file}\n")