Spaces:

Wismut
/

StyleTTS2_Studio

Running

App Files Files Community

StyleTTS2_Studio / pca /generate_pca.py

Wismut

initial commit

0af9841 about 1 year ago

raw

history blame contribute delete

4.58 kB

	import numpy as np
	import json
	from sklearn.decomposition import PCA
	import joblib

	# File paths
	VOICES_JSON_PATH = "voices.json"
	ANNOTATIONS_JSON_PATH = "annotations.json"
	PCA_MODEL_PATH = "pca_model.pkl"
	VECTOR_DIMENSION = 256 # Adjust based on your actual vector size
	N_COMPONENTS = 6 # Number of PCA components for annotated features


	def load_json(file_path):
	"""Load a JSON file."""
	try:
	with open(file_path, "r") as f:
	return json.load(f)
	except FileNotFoundError:
	print(f"Error: {file_path} not found.")
	return {}
	except json.JSONDecodeError:
	print(f"Error: {file_path} is not valid JSON.")
	return {}


	def extract_annotated_vectors():
	"""
	Load annotations and match annotated features with style vectors.
	Returns:
	np.ndarray: Style vectors (256-dim).
	np.ndarray: Annotated features (n_components-dim).
	"""
	# Load data
	voices_data = load_json(VOICES_JSON_PATH)
	annotations = load_json(ANNOTATIONS_JSON_PATH)

	style_vectors = []
	annotated_features = []

	# Extract annotated features and match style vectors
	for item in annotations:
	# Extract the key for the style vector
	audio_path = item.get("audio", "")
	key = audio_path.split("/")[-1].split("-")[-1].replace(".wav", "")

	# Skip if the style vector is missing
	if key not in voices_data:
	print(f"Warning: No style vector found for key '{key}'. Skipping.")
	continue

	# Get the style vector and ensure it's flattened to 1D
	style_vector = np.array(voices_data[key], dtype=np.float32).squeeze()
	if style_vector.ndim != 1:
	print(f"Skipping vector with unexpected dimensions: {style_vector.shape}")
	continue

	# Extract annotated features (pacing, gender, tone, enunciation, style)
	features = [
	item["gender"][0]["rating"],
	item["tone"][0]["rating"],
	item["pacing"][0]["rating"],
	item["enunciation"][0]["rating"],
	item["quality"][0]["rating"],
	item["style"][0]["rating"],
	]

	# Append data
	style_vectors.append(style_vector)
	annotated_features.append(features)

	if not style_vectors or not annotated_features:
	print("Error: No valid style vectors or annotations found.")
	return None, None

	return np.array(style_vectors), np.array(annotated_features)


	def train_and_save_pca_model():
	"""
	Train the PCA model using annotated style vectors and save the model.
	"""
	# Extract style vectors and annotated features
	style_vectors, annotated_features = extract_annotated_vectors()
	if style_vectors is None or annotated_features is None:
	print("Error: Unable to extract annotated data.")
	return

	# Validate shape of style_vectors
	print(f"Style vectors shape: {style_vectors.shape}") # Should be (n_samples, 256)
	print(
	f"Annotated features shape: {annotated_features.shape}"
	) # Should be (n_samples, 5)

	# Train PCA on style vectors
	print(f"Training PCA on {len(style_vectors)} style vectors...")
	pca = PCA(n_components=N_COMPONENTS)
	pca.fit(style_vectors)

	# Save PCA model
	joblib.dump(pca, PCA_MODEL_PATH)
	print(f"PCA model saved to {PCA_MODEL_PATH}.")

	# Optionally save annotated features for downstream tasks
	np.save("annotated_features.npy", annotated_features)
	print("Annotated features saved to 'annotated_features.npy'.")


	def load_pca_model():
	"""Load the trained PCA model."""
	try:
	return joblib.load(PCA_MODEL_PATH)
	except FileNotFoundError:
	print(f"Error: {PCA_MODEL_PATH} not found.")
	return None


	def reduce_to_pca_components(style_vector, pca):
	"""
	Reduce a 256-dimensional style vector to PCA space.

	Args:
	style_vector (np.ndarray): Original style vector (256-dim).
	pca (PCA): Trained PCA model.

	Returns:
	np.ndarray: Reduced vector in PCA space (n_components-dim).
	"""
	return pca.transform([style_vector])[0]


	def reconstruct_from_pca_components(pca_vector, pca):
	"""
	Reconstruct the original style vector from PCA space.

	Args:
	pca_vector (np.ndarray): Vector in PCA space (n_components-dim).
	pca (PCA): Trained PCA model.

	Returns:
	np.ndarray: Reconstructed style vector (256-dim).
	"""
	return pca.inverse_transform([pca_vector])[0]


	if __name__ == "__main__":
	train_and_save_pca_model()