from threading import Thread from pathlib import Path import gradio as gr import subprocess import shutil import time import copy import glob import json import os import shlex import numpy as np import librosa from scipy.io import wavfile import openai from openai import OpenAI from dotenv import load_dotenv from PIL import Image import requests from io import BytesIO load_dotenv() OpenAI.api_key = os.getenv('OPENAI_API_KEY') client = OpenAI( ) # Paths according to your directory structure CURRENT_DIR = Path(__file__).resolve().parent PEOPLE_DIR = CURRENT_DIR / "people" SONGS_DIR = CURRENT_DIR / "songs" INFERENCE_OUTPUT_DIRNAME = CURRENT_DIR / "inference_output" COVER_IMAGE_PATH = SONGS_DIR / "cover.png" logo_image_path = os.path.abspath("Logo.png") # Ensure the inference output directory exists INFERENCE_OUTPUT_DIRNAME.mkdir(parents=True, exist_ok=True) def get_people(): """List the people available for voice conversion.""" return sorted([p.stem for p in PEOPLE_DIR.iterdir() if p.is_dir()]) def get_songs(): """List the song directories available for selection.""" # Use a set to avoid adding duplicate folder names folder_names = set() # Iterate through all directories and subdirectories in SONGS_DIR for root, dirs, _ in os.walk(SONGS_DIR): for dir in dirs: # Construct the relative path from SONGS_DIR to the current directory relative_path = os.path.relpath(os.path.join(root, dir), SONGS_DIR) # Add the relative path of directories to the set folder_names.add(relative_path) # Return a sorted list of unique folder names return sorted(folder_names) def run_inference(speaker, path_str, f0_method, transpose, noise_scale, cluster_ratio): # Ensure path is a Path object path = Path(path_str) model_path = speaker["model_path"] config_path = speaker["cfg_path"] cluster_path = speaker["cluster_path"] output_path = Path(INFERENCE_OUTPUT_DIRNAME, path.name) # Now path is guaranteed to be a Path object cluster_args = f"-k \"{cluster_path}\" -r {cluster_ratio}" if cluster_path and cluster_ratio > 0 else "" # Ensure output directory exists INFERENCE_OUTPUT_DIRNAME.mkdir(parents=True, exist_ok=True) inference_cmd = f"svc infer \"{path.absolute()}\" -m \"{model_path}\" -c \"{config_path}\" {cluster_args} -t {transpose} --f0-method crepe -n 0.4 -o \"{output_path}\" --no-auto-predict-f0" command = shlex.split(inference_cmd) result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) if result.returncode != 0: print(f"Command failed with return code {result.returncode}") print("STDERR:", result.stderr) return None, "⚠️ Error during inference." if "AttributeError" in result.stderr: return None, "⚠️ Modelo SVC incompatible." if not output_path.exists(): print("Expected output file not found:", output_path) return None, "⚠️ Error: Output file not found." return str(output_path), None def get_speaker_details(speaker_name): speakers = [] # This should ideally be a list of dictionaries # Assuming MODELOS is a directory containing subdirectories for each speaker/model for _, dirs, _ in os.walk(PEOPLE_DIR): for dir_name in dirs: if dir_name.lower() == speaker_name.lower(): speaker_path = PEOPLE_DIR / dir_name model_path = next(speaker_path.glob('G_*.pth'), None) description_path = speaker_path / "description.txt" image_path = next(speaker_path.glob('image.png'), None) cfg_path = next(speaker_path.glob('*.json'), None) if model_path and cfg_path: return { "model_path": model_path, "cfg_path": cfg_path, "description_path": description_path, "cluster_path": "", "image_path": image_path } return None def mix_audio(vocals_path, instrumentals_path, output_path): y_vocals, sr = librosa.load(vocals_path, sr=None) y_instrumentals, _ = librosa.load(instrumentals_path, sr=None) # Ensure both tracks are of the same length max_length = max(len(y_vocals), len(y_instrumentals)) y_vocals_padded = np.pad(y_vocals, (0, max_length - len(y_vocals)), mode='constant') y_instrumentals_padded = np.pad(y_instrumentals, (0, max_length - len(y_instrumentals)), mode='constant') # Mix the two audio files mixed_audio = 0.5 * y_vocals_padded + 0.5 * y_instrumentals_padded # Save the mixed audio to a new WAV file wavfile.write(output_path, sr, (mixed_audio * 32767).astype(np.int16)) def gen_caption_image(song_name, person): prompt = f"Create a creative and engaging Instagram caption for a post where {person} is performing the song '{song_name}'. Make it exciting and suitable for social media." response = client.chat.completions.create( model="gpt-3.5-turbo", messages=[ {"role": "user", "content": prompt} ] ) caption= response.choices[0].message.content image_prompt=f'draw an image for my instagram post having for the song {song_name} and the caption {caption}' responseimage = client.images.generate( model="dall-e-3", prompt=image_prompt, size="1024x1024", quality="standard", n=1, ) image_url = responseimage.data[0].url image_response = requests.get(image_url) img = Image.open(BytesIO(image_response.content)) return np.array(img) , caption def voice_conversion(person, song_selection): try: speaker_details = get_speaker_details(person) if not speaker_details: raise Exception("Speaker not found! Error: Speaker details not found.") # Extract the folder name from the song selection (ignoring the file part) folder_name = song_selection.split('\\')[0] vocals_path = SONGS_DIR / folder_name / "vocals.wav" instrumentals_path = SONGS_DIR / folder_name / "instrumentals.wav" if not vocals_path.exists() or not instrumentals_path.exists(): raise Exception(f"Vocals or instrumentals not found in {folder_name}! Error: Audio file not found.") converted_vocals_path, error = run_inference(speaker_details, str(vocals_path), 0, 0, 0.4, 0) if error: raise Exception(error) converted_mix_path = str(INFERENCE_OUTPUT_DIRNAME / f"{folder_name}_converted_mix.wav") mix_audio(converted_vocals_path, str(instrumentals_path), converted_mix_path) return converted_mix_path except Exception as e: return None, None, str(e), "" def update_person_details(selected_person): speaker_details = get_speaker_details(selected_person) image_path = str(speaker_details["image_path"]) # Return the image path in a list as expected by the Gallery component with open(speaker_details["description_path"], 'r') as file: person_description = file.read() return image_path, person_description def update_song_features(song_selection): cover_image_path = COVER_IMAGE_PATH folder_name = song_selection.split('\\')[0] vocals_path = SONGS_DIR / folder_name / "vocals.wav" instrumentals_path = SONGS_DIR / folder_name / "instrumentals.wav" if not vocals_path.exists() or not instrumentals_path.exists(): raise Exception(f"Vocals or instrumentals not found in {folder_name}! Error: Audio file not found.") original_mix_path = str(INFERENCE_OUTPUT_DIRNAME / f"{folder_name}_original_mix.wav") mix_audio(str(vocals_path), str(instrumentals_path), original_mix_path) return original_mix_path, cover_image_path # Updating the interface setup to include the event listener # CSS for artistic theme # CSS for artistic theme and custom positioning css = """ @import url('https://fonts.googleapis.com/css2?family=Pacifico&display=swap'); .center-container { display: flex; justify-content: center; align-items: center; } .right-container { display: flex; justify-content: flex-end; align-items: center; } /* Additional custom CSS for spacing and alignment */ .logo-img { max-width: 100%; height: auto; display: block; margin-left: auto; margin-right: auto; } .alchemy-img { float: right; max-height: 60px; /* Adjust based on your preference */ } """ header_markdown = """