Spaces:
Running
Running
| import numpy as np | |
| import json | |
| from sklearn.decomposition import PCA | |
| import joblib | |
| # File paths | |
| VOICES_JSON_PATH = "voices.json" | |
| ANNOTATIONS_JSON_PATH = "annotations.json" | |
| PCA_MODEL_PATH = "pca_model.pkl" | |
| VECTOR_DIMENSION = 256 # Adjust based on your actual vector size | |
| N_COMPONENTS = 6 # Number of PCA components for annotated features | |
| def load_json(file_path): | |
| """Load a JSON file.""" | |
| try: | |
| with open(file_path, "r") as f: | |
| return json.load(f) | |
| except FileNotFoundError: | |
| print(f"Error: {file_path} not found.") | |
| return {} | |
| except json.JSONDecodeError: | |
| print(f"Error: {file_path} is not valid JSON.") | |
| return {} | |
| def extract_annotated_vectors(): | |
| """ | |
| Load annotations and match annotated features with style vectors. | |
| Returns: | |
| np.ndarray: Style vectors (256-dim). | |
| np.ndarray: Annotated features (n_components-dim). | |
| """ | |
| # Load data | |
| voices_data = load_json(VOICES_JSON_PATH) | |
| annotations = load_json(ANNOTATIONS_JSON_PATH) | |
| style_vectors = [] | |
| annotated_features = [] | |
| # Extract annotated features and match style vectors | |
| for item in annotations: | |
| # Extract the key for the style vector | |
| audio_path = item.get("audio", "") | |
| key = audio_path.split("/")[-1].split("-")[-1].replace(".wav", "") | |
| # Skip if the style vector is missing | |
| if key not in voices_data: | |
| print(f"Warning: No style vector found for key '{key}'. Skipping.") | |
| continue | |
| # Get the style vector and ensure it's flattened to 1D | |
| style_vector = np.array(voices_data[key], dtype=np.float32).squeeze() | |
| if style_vector.ndim != 1: | |
| print(f"Skipping vector with unexpected dimensions: {style_vector.shape}") | |
| continue | |
| # Extract annotated features (pacing, gender, tone, enunciation, style) | |
| features = [ | |
| item["gender"][0]["rating"], | |
| item["tone"][0]["rating"], | |
| item["pacing"][0]["rating"], | |
| item["enunciation"][0]["rating"], | |
| item["quality"][0]["rating"], | |
| item["style"][0]["rating"], | |
| ] | |
| # Append data | |
| style_vectors.append(style_vector) | |
| annotated_features.append(features) | |
| if not style_vectors or not annotated_features: | |
| print("Error: No valid style vectors or annotations found.") | |
| return None, None | |
| return np.array(style_vectors), np.array(annotated_features) | |
| def train_and_save_pca_model(): | |
| """ | |
| Train the PCA model using annotated style vectors and save the model. | |
| """ | |
| # Extract style vectors and annotated features | |
| style_vectors, annotated_features = extract_annotated_vectors() | |
| if style_vectors is None or annotated_features is None: | |
| print("Error: Unable to extract annotated data.") | |
| return | |
| # Validate shape of style_vectors | |
| print(f"Style vectors shape: {style_vectors.shape}") # Should be (n_samples, 256) | |
| print( | |
| f"Annotated features shape: {annotated_features.shape}" | |
| ) # Should be (n_samples, 5) | |
| # Train PCA on style vectors | |
| print(f"Training PCA on {len(style_vectors)} style vectors...") | |
| pca = PCA(n_components=N_COMPONENTS) | |
| pca.fit(style_vectors) | |
| # Save PCA model | |
| joblib.dump(pca, PCA_MODEL_PATH) | |
| print(f"PCA model saved to {PCA_MODEL_PATH}.") | |
| # Optionally save annotated features for downstream tasks | |
| np.save("annotated_features.npy", annotated_features) | |
| print("Annotated features saved to 'annotated_features.npy'.") | |
| def load_pca_model(): | |
| """Load the trained PCA model.""" | |
| try: | |
| return joblib.load(PCA_MODEL_PATH) | |
| except FileNotFoundError: | |
| print(f"Error: {PCA_MODEL_PATH} not found.") | |
| return None | |
| def reduce_to_pca_components(style_vector, pca): | |
| """ | |
| Reduce a 256-dimensional style vector to PCA space. | |
| Args: | |
| style_vector (np.ndarray): Original style vector (256-dim). | |
| pca (PCA): Trained PCA model. | |
| Returns: | |
| np.ndarray: Reduced vector in PCA space (n_components-dim). | |
| """ | |
| return pca.transform([style_vector])[0] | |
| def reconstruct_from_pca_components(pca_vector, pca): | |
| """ | |
| Reconstruct the original style vector from PCA space. | |
| Args: | |
| pca_vector (np.ndarray): Vector in PCA space (n_components-dim). | |
| pca (PCA): Trained PCA model. | |
| Returns: | |
| np.ndarray: Reconstructed style vector (256-dim). | |
| """ | |
| return pca.inverse_transform([pca_vector])[0] | |
| if __name__ == "__main__": | |
| train_and_save_pca_model() | |