first commit

dfb7d74 verified 2 months ago

7.75 kB

	"""Enhance audio files using AI-Coustics API."""

	# /// script
	# requires-python = ">=3.12"
	# dependencies = [
	# "librosa",
	# "requests",
	# "soundfile",
	# "tqdm",
	# ]
	# ///
	import json
	import os
	import time
	from pathlib import Path
	from typing import List, Optional

	import librosa
	import requests
	import soundfile as sf
	import tqdm


	class AiCousticsEnhancer:
	"""Client for AI-Coustics audio enhancement API"""

	def __init__(self, api_key: str):
	"""
	Initialize the AI-Coustics API client

	Args:
	api_key: Your AI-Coustics API key
	"""
	self.api_key = api_key
	self.base_url = "https://api.ai-coustics.io/v2"
	self.headers = {"X-API-Key": self.api_key}

	def upload_audio(
	self,
	file_path: str,
	enhancement_level: int = 100,
	enhancement_model: str = "LARK_V2",
	loudness_target: int = -19,
	true_peak: int = -1,
	transcode: str = "WAV",
	) -> dict:
	"""
	Upload an audio file for enhancement

	Args:
	file_path: Path to the audio file
	enhancement_level: Enhancement strength (0-100)
	enhancement_model: Model to use (LARK_V2 or FINCH_V2)
	loudness_target: Target loudness in LUFS
	true_peak: True peak level in dBFS
	transcode: Output format (WAV, MP3, etc.)

	Returns:
	Response dictionary with uid and metadata
	"""
	url = f"{self.base_url}/medias"

	# Prepare the enhancement parameters
	media_enhancement = {
	"loudness_target": loudness_target,
	"true_peak": true_peak,
	"enhancement_level": enhancement_level,
	"enhancement_model": enhancement_model,
	"transcode": transcode,
	}

	# Upload file
	with open(file_path, "rb") as f:
	files = {"file": f}
	data = {"media_enhancement": json.dumps(media_enhancement)}

	response = requests.post(url, headers=self.headers, files=files, data=data)
	response.raise_for_status()

	return response.json()

	def check_status(self, uid: str) -> dict:
	"""
	Check the processing status of an uploaded media file

	Args:
	uid: Unique identifier returned from upload

	Returns:
	Metadata dictionary with current status
	"""
	url = f"{self.base_url}/medias/{uid}/metadata"
	response = requests.get(url, headers=self.headers)
	response.raise_for_status()
	return response.json()

	def wait_for_completion(
	self, uid: str, poll_interval: int = 2, timeout: int = 300
	) -> dict:
	"""
	Poll the API until processing is complete

	Args:
	uid: Unique identifier returned from upload
	poll_interval: Seconds between status checks
	timeout: Maximum seconds to wait

	Returns:
	Final metadata dictionary
	"""
	start_time = time.time()

	while time.time() - start_time < timeout:
	metadata = self.check_status(uid)
	status = metadata.get("enhancement_status")

	print(f"Status: {status}")

	if status == "COMPLETED":
	return metadata
	elif status == "FAILED":
	raise Exception(f"Enhancement failed: {metadata}")

	time.sleep(poll_interval)

	raise TimeoutError(f"Processing did not complete within {timeout} seconds")

	def download_enhanced(self, uid: str, output_path: str):
	"""
	Download the enhanced audio file

	Args:
	uid: Unique identifier returned from upload
	output_path: Path where to save the enhanced file
	"""
	url = f"{self.base_url}/medias/{uid}/file"
	response = requests.get(url, headers=self.headers, stream=True)
	response.raise_for_status()

	# Save the file
	with open(output_path, "wb") as f:
	for chunk in response.iter_content(chunk_size=8192):
	f.write(chunk)

	print(f"Enhanced audio saved to: {output_path}")


	def get_enhanced_output_path(input_path: str) -> str:
	"""Generate output path by adding '_enhanced' suffix to the name"""
	path = Path(input_path)

	return str(path.with_stem(path.stem + "_enhanced"))


	def enhance_audio_files(
	input_paths: List[str],
	api_key: str,
	enhancement_level: int = 90,
	enhancement_model: str = "LARK_V2",
	) -> List[str]:
	"""
	Process a list of audio files through AI-Coustics API

	Args:
	input_paths: List of paths to audio files
	api_key: Your AI-Coustics API key
	enhancement_level: Enhancement strength (0-100)
	enhancement_model: Model to use (LARK_V2 or FINCH_V2)

	Returns:
	List of paths to enhanced audio files
	"""
	# Initialize client
	client = AiCousticsEnhancer(api_key)

	enhanced_files = []
	valid_input_paths = [
	p
	for p in input_paths
	if os.path.exists(p) and not os.path.exists(get_enhanced_output_path(p))
	]

	for input_path in tqdm.tqdm(valid_input_paths):
	# Double-check file existence
	if not os.path.exists(input_path):
	print(f"Error: File not found: {input_path}")
	continue
	# Generate output path with '_enhanced' directory suffix
	output_path = get_enhanced_output_path(input_path)
	output_dir = os.path.dirname(output_path)
	if os.path.exists(output_path):
	print(f"Found {output_path}, skipping")
	continue
	os.makedirs(output_dir, exist_ok=True)

	try:
	# Upload for enhancement
	print("Uploading to AI-Coustics...")
	result = client.upload_audio(
	input_path,
	enhancement_level=enhancement_level,
	enhancement_model=enhancement_model,
	)

	uid = result["uid"]
	print(f"Uploaded successfully. UID: {uid}")

	# Wait for processing
	print("Waiting for enhancement to complete...")
	_metadata = client.wait_for_completion(uid)
	print("Enhancement completed!")

	# Download enhanced audio
	client.download_enhanced(uid, output_path)
	enhanced_files.append(output_path)

	except Exception as e:
	print(f"Error processing file {input_path}: {e}")
	continue

	print(f"\n{'=' * 50}")
	print(
	f"Processing complete! Enhanced {len(enhanced_files)}/{len(input_paths)} files"
	)
	print(f"{'=' * 50}")

	return enhanced_files


	# Example usage
	if __name__ == "__main__":
	# Set your API key (get it from https://developer.ai-coustics.com/)
	API_KEY = os.environ["AICOUSTICS_API_KEY"]

	input_files = librosa.util.find_files(Path(__file__).parent / "voice-donations")

	input_files = [f for f in input_files if "_enhanced" not in f]

	# Process audio files
	enhanced_files = enhance_audio_files(
	input_paths=input_files,
	api_key=API_KEY,
	enhancement_level=100,
	enhancement_model="LARK_V2", # or "FINCH_V2" for voice isolation
	)

	print("\nEnhanced files:")
	for file in enhanced_files:
	print(f" - {file}")

	# Read and verify the enhanced audio with soundfile
	if enhanced_files:
	print("\nReading first enhanced file with soundfile:")
	audio_data, sample_rate = sf.read(enhanced_files[0])
	print(f" Shape: {audio_data.shape}")
	print(f" Sample rate: {sample_rate} Hz")
	print(f" Duration: {len(audio_data) / sample_rate:.2f} seconds")