Spaces:
Running
Running
fixed missing cuda option
Browse files
Logo.ai
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Logo.png
ADDED
|
app.py
CHANGED
|
@@ -367,7 +367,7 @@ def create_combined_interface():
|
|
| 367 |
with gr.Column():
|
| 368 |
text_input = gr.Textbox(
|
| 369 |
label="Text to Synthesize",
|
| 370 |
-
value="
|
| 371 |
lines=3,
|
| 372 |
)
|
| 373 |
voice_dropdown = gr.Dropdown(
|
|
@@ -411,7 +411,7 @@ def create_combined_interface():
|
|
| 411 |
with gr.Column():
|
| 412 |
text_input_studio = gr.Textbox(
|
| 413 |
label="Text to Synthesize",
|
| 414 |
-
value="
|
| 415 |
lines=3,
|
| 416 |
)
|
| 417 |
voice_dropdown_studio = gr.Dropdown(
|
|
|
|
| 367 |
with gr.Column():
|
| 368 |
text_input = gr.Textbox(
|
| 369 |
label="Text to Synthesize",
|
| 370 |
+
value="Did you know that you can just do stuff?",
|
| 371 |
lines=3,
|
| 372 |
)
|
| 373 |
voice_dropdown = gr.Dropdown(
|
|
|
|
| 411 |
with gr.Column():
|
| 412 |
text_input_studio = gr.Textbox(
|
| 413 |
label="Text to Synthesize",
|
| 414 |
+
value="Use the sliders to customize a voice!",
|
| 415 |
lines=3,
|
| 416 |
)
|
| 417 |
voice_dropdown_studio = gr.Dropdown(
|
text2speech.py
CHANGED
|
@@ -18,6 +18,13 @@ from typing import Optional, Tuple, List
|
|
| 18 |
VOICES_JSON_PATH = "voices.json" # Contains your known style vectors
|
| 19 |
RANDOM_VOICES_JSON_PATH = "random_voices.json" # We'll store newly sampled vectors here
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
##############################################################################
|
| 23 |
# JSON LOAD/SAVE
|
|
@@ -131,7 +138,7 @@ def sample_random_style(mean: np.ndarray, cov: np.ndarray) -> torch.Tensor:
|
|
| 131 |
# Sample from multivariate normal distribution
|
| 132 |
z = np.random.multivariate_normal(mean, cov)
|
| 133 |
# Convert to torch tensor
|
| 134 |
-
style_tensor = torch.tensor(z, dtype=torch.float32)
|
| 135 |
# Unsqueeze to shape (1, D)
|
| 136 |
style_tensor = style_tensor.unsqueeze(0)
|
| 137 |
print(f"Sampled a new random style vector with shape {style_tensor.shape}.")
|
|
@@ -354,7 +361,9 @@ def get_or_compute_style_vector(key_or_path: str, voices_data: dict) -> torch.Te
|
|
| 354 |
"""
|
| 355 |
if key_or_path in voices_data:
|
| 356 |
print(f"Found style vector for '{key_or_path}' in '{VOICES_JSON_PATH}'.")
|
| 357 |
-
style_vec = torch.tensor(voices_data[key_or_path], dtype=torch.float32)
|
|
|
|
|
|
|
| 358 |
elif os.path.isfile(key_or_path):
|
| 359 |
print(
|
| 360 |
f"No existing style for '{key_or_path}'. Attempting to compute from audio..."
|
|
@@ -362,6 +371,7 @@ def get_or_compute_style_vector(key_or_path: str, voices_data: dict) -> torch.Te
|
|
| 362 |
style_vec = inference.compute_style(key_or_path)
|
| 363 |
if style_vec is None:
|
| 364 |
raise ValueError(f"Failed to compute style vector from '{key_or_path}'.")
|
|
|
|
| 365 |
voices_data[key_or_path] = style_vec.squeeze(0).tolist()
|
| 366 |
save_json(voices_data, VOICES_JSON_PATH)
|
| 367 |
print(
|
|
@@ -377,9 +387,10 @@ def get_or_compute_style_vector(key_or_path: str, voices_data: dict) -> torch.Te
|
|
| 377 |
# Ensure style_vec is 2D: (1, D)
|
| 378 |
if style_vec.dim() == 1:
|
| 379 |
style_vec = style_vec.unsqueeze(0)
|
|
|
|
| 380 |
print(f"Unsqueezed style vector to shape: {style_vec.shape}")
|
| 381 |
elif style_vec.dim() == 3:
|
| 382 |
-
style_vec = style_vec.squeeze(1)
|
| 383 |
print(f"Squeezed style vector to shape: {style_vec.shape}")
|
| 384 |
elif style_vec.dim() != 2:
|
| 385 |
raise ValueError(
|
|
@@ -495,9 +506,10 @@ def tts_with_style_vector(
|
|
| 495 |
# Ensure style_vec has shape (1, D)
|
| 496 |
if style_vec.dim() == 1:
|
| 497 |
style_vec = style_vec.unsqueeze(0) # e.g. (D,) -> (1, D)
|
|
|
|
| 498 |
print(f"Unsqueezed style vector to shape: {style_vec.shape}")
|
| 499 |
elif style_vec.dim() == 3:
|
| 500 |
-
style_vec = style_vec.squeeze(1)
|
| 501 |
print(f"Squeezed style vector to shape: {style_vec.shape}")
|
| 502 |
elif style_vec.dim() != 2:
|
| 503 |
print(f"Unexpected style vector shape: {style_vec.shape}. Expected 2D tensor.")
|
|
|
|
| 18 |
VOICES_JSON_PATH = "voices.json" # Contains your known style vectors
|
| 19 |
RANDOM_VOICES_JSON_PATH = "random_voices.json" # We'll store newly sampled vectors here
|
| 20 |
|
| 21 |
+
##############################################################################
|
| 22 |
+
# DEVICE CONFIGURATION
|
| 23 |
+
##############################################################################
|
| 24 |
+
# Detect if CUDA is available and set the device accordingly
|
| 25 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 26 |
+
print(f"Using device: {device}")
|
| 27 |
+
|
| 28 |
|
| 29 |
##############################################################################
|
| 30 |
# JSON LOAD/SAVE
|
|
|
|
| 138 |
# Sample from multivariate normal distribution
|
| 139 |
z = np.random.multivariate_normal(mean, cov)
|
| 140 |
# Convert to torch tensor
|
| 141 |
+
style_tensor = torch.tensor(z, dtype=torch.float32).to(device) # Move to device
|
| 142 |
# Unsqueeze to shape (1, D)
|
| 143 |
style_tensor = style_tensor.unsqueeze(0)
|
| 144 |
print(f"Sampled a new random style vector with shape {style_tensor.shape}.")
|
|
|
|
| 361 |
"""
|
| 362 |
if key_or_path in voices_data:
|
| 363 |
print(f"Found style vector for '{key_or_path}' in '{VOICES_JSON_PATH}'.")
|
| 364 |
+
style_vec = torch.tensor(voices_data[key_or_path], dtype=torch.float32).to(
|
| 365 |
+
device
|
| 366 |
+
) # Move to device
|
| 367 |
elif os.path.isfile(key_or_path):
|
| 368 |
print(
|
| 369 |
f"No existing style for '{key_or_path}'. Attempting to compute from audio..."
|
|
|
|
| 371 |
style_vec = inference.compute_style(key_or_path)
|
| 372 |
if style_vec is None:
|
| 373 |
raise ValueError(f"Failed to compute style vector from '{key_or_path}'.")
|
| 374 |
+
style_vec = style_vec.to(device) # Move to device
|
| 375 |
voices_data[key_or_path] = style_vec.squeeze(0).tolist()
|
| 376 |
save_json(voices_data, VOICES_JSON_PATH)
|
| 377 |
print(
|
|
|
|
| 387 |
# Ensure style_vec is 2D: (1, D)
|
| 388 |
if style_vec.dim() == 1:
|
| 389 |
style_vec = style_vec.unsqueeze(0)
|
| 390 |
+
style_vec = style_vec.to(device) # Ensure it's on the correct device
|
| 391 |
print(f"Unsqueezed style vector to shape: {style_vec.shape}")
|
| 392 |
elif style_vec.dim() == 3:
|
| 393 |
+
style_vec = style_vec.squeeze(1).to(device)
|
| 394 |
print(f"Squeezed style vector to shape: {style_vec.shape}")
|
| 395 |
elif style_vec.dim() != 2:
|
| 396 |
raise ValueError(
|
|
|
|
| 506 |
# Ensure style_vec has shape (1, D)
|
| 507 |
if style_vec.dim() == 1:
|
| 508 |
style_vec = style_vec.unsqueeze(0) # e.g. (D,) -> (1, D)
|
| 509 |
+
style_vec = style_vec.to(device) # Move to device
|
| 510 |
print(f"Unsqueezed style vector to shape: {style_vec.shape}")
|
| 511 |
elif style_vec.dim() == 3:
|
| 512 |
+
style_vec = style_vec.squeeze(1).to(device)
|
| 513 |
print(f"Squeezed style vector to shape: {style_vec.shape}")
|
| 514 |
elif style_vec.dim() != 2:
|
| 515 |
print(f"Unexpected style vector shape: {style_vec.shape}. Expected 2D tensor.")
|