Upload 712 files

78c9d5b verified 3 months ago

11.5 kB

	import sys
	import math
	import array
	from .utils import (
	db_to_float,
	ratio_to_db,
	register_pydub_effect,
	make_chunks,
	audioop,
	get_min_max_value
	)
	from .silence import split_on_silence
	from .exceptions import TooManyMissingFrames, InvalidDuration

	if sys.version_info >= (3, 0):
	xrange = range


	@register_pydub_effect
	def apply_mono_filter_to_each_channel(seg, filter_fn):
	n_channels = seg.channels

	channel_segs = seg.split_to_mono()
	channel_segs = [filter_fn(channel_seg) for channel_seg in channel_segs]

	out_data = seg.get_array_of_samples()
	for channel_i, channel_seg in enumerate(channel_segs):
	for sample_i, sample in enumerate(channel_seg.get_array_of_samples()):
	index = (sample_i * n_channels) + channel_i
	out_data[index] = sample

	return seg._spawn(out_data)


	@register_pydub_effect
	def normalize(seg, headroom=0.1):
	"""
	headroom is how close to the maximum volume to boost the signal up to (specified in dB)
	"""
	peak_sample_val = seg.max

	# if the max is 0, this audio segment is silent, and can't be normalized
	if peak_sample_val == 0:
	return seg

	target_peak = seg.max_possible_amplitude * db_to_float(-headroom)

	needed_boost = ratio_to_db(target_peak / peak_sample_val)
	return seg.apply_gain(needed_boost)


	@register_pydub_effect
	def speedup(seg, playback_speed=1.5, chunk_size=150, crossfade=25):
	# we will keep audio in 150ms chunks since one waveform at 20Hz is 50ms long
	# (20 Hz is the lowest frequency audible to humans)

	# portion of AUDIO TO KEEP. if playback speed is 1.25 we keep 80% (0.8) and
	# discard 20% (0.2)
	atk = 1.0 / playback_speed

	if playback_speed < 2.0:
	# throwing out more than half the audio - keep 50ms chunks
	ms_to_remove_per_chunk = int(chunk_size * (1 - atk) / atk)
	else:
	# throwing out less than half the audio - throw out 50ms chunks
	ms_to_remove_per_chunk = int(chunk_size)
	chunk_size = int(atk * chunk_size / (1 - atk))

	# the crossfade cannot be longer than the amount of audio we're removing
	crossfade = min(crossfade, ms_to_remove_per_chunk - 1)

	# DEBUG
	#print("chunk: {0}, rm: {1}".format(chunk_size, ms_to_remove_per_chunk))

	chunks = make_chunks(seg, chunk_size + ms_to_remove_per_chunk)
	if len(chunks) < 2:
	raise Exception("Could not speed up AudioSegment, it was too short {2:0.2f}s for the current settings:\n{0}ms chunks at {1:0.1f}x speedup".format(
	chunk_size, playback_speed, seg.duration_seconds))

	# we'll actually truncate a bit less than we calculated to make up for the
	# crossfade between chunks
	ms_to_remove_per_chunk -= crossfade

	# we don't want to truncate the last chunk since it is not guaranteed to be
	# the full chunk length
	last_chunk = chunks[-1]
	chunks = [chunk[:-ms_to_remove_per_chunk] for chunk in chunks[:-1]]

	out = chunks[0]
	for chunk in chunks[1:]:
	out = out.append(chunk, crossfade=crossfade)

	out += last_chunk
	return out


	@register_pydub_effect
	def strip_silence(seg, silence_len=1000, silence_thresh=-16, padding=100):
	if padding > silence_len:
	raise InvalidDuration("padding cannot be longer than silence_len")

	chunks = split_on_silence(seg, silence_len, silence_thresh, padding)
	crossfade = padding / 2

	if not len(chunks):
	return seg[0:0]

	seg = chunks[0]
	for chunk in chunks[1:]:
	seg = seg.append(chunk, crossfade=crossfade)

	return seg


	@register_pydub_effect
	def compress_dynamic_range(seg, threshold=-20.0, ratio=4.0, attack=5.0, release=50.0):
	"""
	Keyword Arguments:

	threshold - default: -20.0
	Threshold in dBFS. default of -20.0 means -20dB relative to the
	maximum possible volume. 0dBFS is the maximum possible value so
	all values for this argument sould be negative.

	ratio - default: 4.0
	Compression ratio. Audio louder than the threshold will be
	reduced to 1/ratio the volume. A ratio of 4.0 is equivalent to
	a setting of 4:1 in a pro-audio compressor like the Waves C1.

	attack - default: 5.0
	Attack in milliseconds. How long it should take for the compressor
	to kick in once the audio has exceeded the threshold.

	release - default: 50.0
	Release in milliseconds. How long it should take for the compressor
	to stop compressing after the audio has falled below the threshold.


	For an overview of Dynamic Range Compression, and more detailed explanation
	of the related terminology, see:

	http://en.wikipedia.org/wiki/Dynamic_range_compression
	"""

	thresh_rms = seg.max_possible_amplitude * db_to_float(threshold)

	look_frames = int(seg.frame_count(ms=attack))
	def rms_at(frame_i):
	return seg.get_sample_slice(frame_i - look_frames, frame_i).rms
	def db_over_threshold(rms):
	if rms == 0: return 0.0
	db = ratio_to_db(rms / thresh_rms)
	return max(db, 0)

	output = []

	# amount to reduce the volume of the audio by (in dB)
	attenuation = 0.0

	attack_frames = seg.frame_count(ms=attack)
	release_frames = seg.frame_count(ms=release)
	for i in xrange(int(seg.frame_count())):
	rms_now = rms_at(i)

	# with a ratio of 4.0 this means the volume will exceed the threshold by
	# 1/4 the amount (of dB) that it would otherwise
	max_attenuation = (1 - (1.0 / ratio)) * db_over_threshold(rms_now)

	attenuation_inc = max_attenuation / attack_frames
	attenuation_dec = max_attenuation / release_frames

	if rms_now > thresh_rms and attenuation <= max_attenuation:
	attenuation += attenuation_inc
	attenuation = min(attenuation, max_attenuation)
	else:
	attenuation -= attenuation_dec
	attenuation = max(attenuation, 0)

	frame = seg.get_frame(i)
	if attenuation != 0.0:
	frame = audioop.mul(frame,
	seg.sample_width,
	db_to_float(-attenuation))

	output.append(frame)

	return seg._spawn(data=b''.join(output))


	# Invert the phase of the signal.

	@register_pydub_effect

	def invert_phase(seg, channels=(1, 1)):
	"""
	channels- specifies which channel (left or right) to reverse the phase of.
	Note that mono AudioSegments will become stereo.
	"""
	if channels == (1, 1):
	inverted = audioop.mul(seg._data, seg.sample_width, -1.0)
	return seg._spawn(data=inverted)

	else:
	if seg.channels == 2:
	left, right = seg.split_to_mono()
	else:
	raise Exception("Can't implicitly convert an AudioSegment with " + str(seg.channels) + " channels to stereo.")

	if channels == (1, 0):
	left = left.invert_phase()
	else:
	right = right.invert_phase()

	return seg.from_mono_audiosegments(left, right)



	# High and low pass filters based on implementation found on Stack Overflow:
	# http://stackoverflow.com/questions/13882038/implementing-simple-high-and-low-pass-filters-in-c

	@register_pydub_effect
	def low_pass_filter(seg, cutoff):
	"""
	cutoff - Frequency (in Hz) where higher frequency signal will begin to
	be reduced by 6dB per octave (doubling in frequency) above this point
	"""
	RC = 1.0 / (cutoff * 2 * math.pi)
	dt = 1.0 / seg.frame_rate

	alpha = dt / (RC + dt)

	original = seg.get_array_of_samples()
	filteredArray = array.array(seg.array_type, original)

	frame_count = int(seg.frame_count())

	last_val = [0] * seg.channels
	for i in range(seg.channels):
	last_val[i] = filteredArray[i] = original[i]

	for i in range(1, frame_count):
	for j in range(seg.channels):
	offset = (i * seg.channels) + j
	last_val[j] = last_val[j] + (alpha * (original[offset] - last_val[j]))
	filteredArray[offset] = int(last_val[j])

	return seg._spawn(data=filteredArray)


	@register_pydub_effect
	def high_pass_filter(seg, cutoff):
	"""
	cutoff - Frequency (in Hz) where lower frequency signal will begin to
	be reduced by 6dB per octave (doubling in frequency) below this point
	"""
	RC = 1.0 / (cutoff * 2 * math.pi)
	dt = 1.0 / seg.frame_rate

	alpha = RC / (RC + dt)

	minval, maxval = get_min_max_value(seg.sample_width * 8)

	original = seg.get_array_of_samples()
	filteredArray = array.array(seg.array_type, original)

	frame_count = int(seg.frame_count())

	last_val = [0] * seg.channels
	for i in range(seg.channels):
	last_val[i] = filteredArray[i] = original[i]

	for i in range(1, frame_count):
	for j in range(seg.channels):
	offset = (i * seg.channels) + j
	offset_minus_1 = ((i-1) * seg.channels) + j

	last_val[j] = alpha * (last_val[j] + original[offset] - original[offset_minus_1])
	filteredArray[offset] = int(min(max(last_val[j], minval), maxval))

	return seg._spawn(data=filteredArray)


	@register_pydub_effect
	def pan(seg, pan_amount):
	"""
	pan_amount should be between -1.0 (100% left) and +1.0 (100% right)

	When pan_amount == 0.0 the left/right balance is not changed.

	Panning does not alter the perceived loundness, but since loudness
	is decreasing on one side, the other side needs to get louder to
	compensate. When panned hard left, the left channel will be 3dB louder.
	"""
	if not -1.0 <= pan_amount <= 1.0:
	raise ValueError("pan_amount should be between -1.0 (100% left) and +1.0 (100% right)")

	max_boost_db = ratio_to_db(2.0)
	boost_db = abs(pan_amount) * max_boost_db

	boost_factor = db_to_float(boost_db)
	reduce_factor = db_to_float(max_boost_db) - boost_factor

	reduce_db = ratio_to_db(reduce_factor)

	# Cut boost in half (max boost== 3dB) - in reality 2 speakers
	# do not sum to a full 6 dB.
	boost_db = boost_db / 2.0

	if pan_amount < 0:
	return seg.apply_gain_stereo(boost_db, reduce_db)
	else:
	return seg.apply_gain_stereo(reduce_db, boost_db)


	@register_pydub_effect
	def apply_gain_stereo(seg, left_gain=0.0, right_gain=0.0):
	"""
	left_gain - amount of gain to apply to the left channel (in dB)
	right_gain - amount of gain to apply to the right channel (in dB)

	note: mono audio segments will be converted to stereo
	"""
	if seg.channels == 1:
	left = right = seg
	elif seg.channels == 2:
	left, right = seg.split_to_mono()

	l_mult_factor = db_to_float(left_gain)
	r_mult_factor = db_to_float(right_gain)

	left_data = audioop.mul(left._data, left.sample_width, l_mult_factor)
	left_data = audioop.tostereo(left_data, left.sample_width, 1, 0)

	right_data = audioop.mul(right._data, right.sample_width, r_mult_factor)
	right_data = audioop.tostereo(right_data, right.sample_width, 0, 1)

	output = audioop.add(left_data, right_data, seg.sample_width)

	return seg._spawn(data=output,
	overrides={'channels': 2,
	'frame_width': 2 * seg.sample_width})