| """Enhance audio files using AI-Coustics API.""" |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| import json |
| import os |
| import time |
| from pathlib import Path |
| from typing import List, Optional |
|
|
| import librosa |
| import requests |
| import soundfile as sf |
| import tqdm |
|
|
|
|
| class AiCousticsEnhancer: |
| """Client for AI-Coustics audio enhancement API""" |
|
|
| def __init__(self, api_key: str): |
| """ |
| Initialize the AI-Coustics API client |
| |
| Args: |
| api_key: Your AI-Coustics API key |
| """ |
| self.api_key = api_key |
| self.base_url = "https://api.ai-coustics.io/v2" |
| self.headers = {"X-API-Key": self.api_key} |
|
|
| def upload_audio( |
| self, |
| file_path: str, |
| enhancement_level: int = 100, |
| enhancement_model: str = "LARK_V2", |
| loudness_target: int = -19, |
| true_peak: int = -1, |
| transcode: str = "WAV", |
| ) -> dict: |
| """ |
| Upload an audio file for enhancement |
| |
| Args: |
| file_path: Path to the audio file |
| enhancement_level: Enhancement strength (0-100) |
| enhancement_model: Model to use (LARK_V2 or FINCH_V2) |
| loudness_target: Target loudness in LUFS |
| true_peak: True peak level in dBFS |
| transcode: Output format (WAV, MP3, etc.) |
| |
| Returns: |
| Response dictionary with uid and metadata |
| """ |
| url = f"{self.base_url}/medias" |
|
|
| |
| media_enhancement = { |
| "loudness_target": loudness_target, |
| "true_peak": true_peak, |
| "enhancement_level": enhancement_level, |
| "enhancement_model": enhancement_model, |
| "transcode": transcode, |
| } |
|
|
| |
| with open(file_path, "rb") as f: |
| files = {"file": f} |
| data = {"media_enhancement": json.dumps(media_enhancement)} |
|
|
| response = requests.post(url, headers=self.headers, files=files, data=data) |
| response.raise_for_status() |
|
|
| return response.json() |
|
|
| def check_status(self, uid: str) -> dict: |
| """ |
| Check the processing status of an uploaded media file |
| |
| Args: |
| uid: Unique identifier returned from upload |
| |
| Returns: |
| Metadata dictionary with current status |
| """ |
| url = f"{self.base_url}/medias/{uid}/metadata" |
| response = requests.get(url, headers=self.headers) |
| response.raise_for_status() |
| return response.json() |
|
|
| def wait_for_completion( |
| self, uid: str, poll_interval: int = 2, timeout: int = 300 |
| ) -> dict: |
| """ |
| Poll the API until processing is complete |
| |
| Args: |
| uid: Unique identifier returned from upload |
| poll_interval: Seconds between status checks |
| timeout: Maximum seconds to wait |
| |
| Returns: |
| Final metadata dictionary |
| """ |
| start_time = time.time() |
|
|
| while time.time() - start_time < timeout: |
| metadata = self.check_status(uid) |
| status = metadata.get("enhancement_status") |
|
|
| print(f"Status: {status}") |
|
|
| if status == "COMPLETED": |
| return metadata |
| elif status == "FAILED": |
| raise Exception(f"Enhancement failed: {metadata}") |
|
|
| time.sleep(poll_interval) |
|
|
| raise TimeoutError(f"Processing did not complete within {timeout} seconds") |
|
|
| def download_enhanced(self, uid: str, output_path: str): |
| """ |
| Download the enhanced audio file |
| |
| Args: |
| uid: Unique identifier returned from upload |
| output_path: Path where to save the enhanced file |
| """ |
| url = f"{self.base_url}/medias/{uid}/file" |
| response = requests.get(url, headers=self.headers, stream=True) |
| response.raise_for_status() |
|
|
| |
| with open(output_path, "wb") as f: |
| for chunk in response.iter_content(chunk_size=8192): |
| f.write(chunk) |
|
|
| print(f"Enhanced audio saved to: {output_path}") |
|
|
|
|
| def get_enhanced_output_path(input_path: str) -> str: |
| """Generate output path by adding '_enhanced' suffix to the name""" |
| path = Path(input_path) |
|
|
| return str(path.with_stem(path.stem + "_enhanced")) |
|
|
|
|
| def enhance_audio_files( |
| input_paths: List[str], |
| api_key: str, |
| enhancement_level: int = 90, |
| enhancement_model: str = "LARK_V2", |
| ) -> List[str]: |
| """ |
| Process a list of audio files through AI-Coustics API |
| |
| Args: |
| input_paths: List of paths to audio files |
| api_key: Your AI-Coustics API key |
| enhancement_level: Enhancement strength (0-100) |
| enhancement_model: Model to use (LARK_V2 or FINCH_V2) |
| |
| Returns: |
| List of paths to enhanced audio files |
| """ |
| |
| client = AiCousticsEnhancer(api_key) |
|
|
| enhanced_files = [] |
| valid_input_paths = [ |
| p |
| for p in input_paths |
| if os.path.exists(p) and not os.path.exists(get_enhanced_output_path(p)) |
| ] |
|
|
| for input_path in tqdm.tqdm(valid_input_paths): |
| |
| if not os.path.exists(input_path): |
| print(f"Error: File not found: {input_path}") |
| continue |
| |
| output_path = get_enhanced_output_path(input_path) |
| output_dir = os.path.dirname(output_path) |
| if os.path.exists(output_path): |
| print(f"Found {output_path}, skipping") |
| continue |
| os.makedirs(output_dir, exist_ok=True) |
|
|
| try: |
| |
| print("Uploading to AI-Coustics...") |
| result = client.upload_audio( |
| input_path, |
| enhancement_level=enhancement_level, |
| enhancement_model=enhancement_model, |
| ) |
|
|
| uid = result["uid"] |
| print(f"Uploaded successfully. UID: {uid}") |
|
|
| |
| print("Waiting for enhancement to complete...") |
| _metadata = client.wait_for_completion(uid) |
| print("Enhancement completed!") |
|
|
| |
| client.download_enhanced(uid, output_path) |
| enhanced_files.append(output_path) |
|
|
| except Exception as e: |
| print(f"Error processing file {input_path}: {e}") |
| continue |
|
|
| print(f"\n{'=' * 50}") |
| print( |
| f"Processing complete! Enhanced {len(enhanced_files)}/{len(input_paths)} files" |
| ) |
| print(f"{'=' * 50}") |
|
|
| return enhanced_files |
|
|
|
|
| |
| if __name__ == "__main__": |
| |
| API_KEY = os.environ["AICOUSTICS_API_KEY"] |
|
|
| input_files = librosa.util.find_files(Path(__file__).parent / "voice-donations") |
|
|
| input_files = [f for f in input_files if "_enhanced" not in f] |
|
|
| |
| enhanced_files = enhance_audio_files( |
| input_paths=input_files, |
| api_key=API_KEY, |
| enhancement_level=100, |
| enhancement_model="LARK_V2", |
| ) |
|
|
| print("\nEnhanced files:") |
| for file in enhanced_files: |
| print(f" - {file}") |
|
|
| |
| if enhanced_files: |
| print("\nReading first enhanced file with soundfile:") |
| audio_data, sample_rate = sf.read(enhanced_files[0]) |
| print(f" Shape: {audio_data.shape}") |
| print(f" Sample rate: {sample_rate} Hz") |
| print(f" Duration: {len(audio_data) / sample_rate:.2f} seconds") |
|
|