Update app.py
Browse files
app.py
CHANGED
|
@@ -20,6 +20,7 @@ import threading
|
|
| 20 |
import time
|
| 21 |
from pathlib import Path
|
| 22 |
import tempfile
|
|
|
|
| 23 |
|
| 24 |
# Set environment variables for HF Spaces
|
| 25 |
os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
|
|
@@ -40,6 +41,24 @@ MAX_COMBINED_DURATION = 30 * 60 # 30 minutes in seconds
|
|
| 40 |
PAUSE_DURATION = 3.0 # 3 seconds pause between audios
|
| 41 |
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
def download_models():
|
| 44 |
"""Download models if they don't exist"""
|
| 45 |
checkpoints_dir = "./checkpoints"
|
|
@@ -150,16 +169,15 @@ def create_combined_audios(audio_files_info):
|
|
| 150 |
# 1.5 Sekunden Intro-Stille vor der ersten Audio
|
| 151 |
silence_intro = np.zeros(int(sr * 1.5), dtype=np.int16)
|
| 152 |
audio_out.append(silence_intro)
|
| 153 |
-
|
| 154 |
for i, fp in enumerate(current_files):
|
| 155 |
-
data, _ = sf.read(fp, dtype=
|
| 156 |
audio_out.append(data)
|
| 157 |
-
|
| 158 |
# Zwischen Affirmationen 3 Sekunden Pause
|
| 159 |
if i < len(current_files) - 1:
|
| 160 |
audio_out.append(silence_3s)
|
| 161 |
|
| 162 |
-
|
| 163 |
final_audio = np.concatenate(audio_out)
|
| 164 |
sf.write(combined_name, final_audio, sr, subtype="PCM_16")
|
| 165 |
|
|
@@ -194,16 +212,15 @@ def create_combined_audios(audio_files_info):
|
|
| 194 |
# 1.5 Sekunden Intro-Stille vor der ersten Audio
|
| 195 |
silence_intro = np.zeros(int(sr * 1.5), dtype=np.int16)
|
| 196 |
audio_out.append(silence_intro)
|
| 197 |
-
|
| 198 |
for i, fp in enumerate(current_files):
|
| 199 |
-
data, _ = sf.read(fp, dtype=
|
| 200 |
audio_out.append(data)
|
| 201 |
-
|
| 202 |
# Zwischen Affirmationen 3 Sekunden Pause
|
| 203 |
if i < len(current_files) - 1:
|
| 204 |
audio_out.append(silence_3s)
|
| 205 |
|
| 206 |
-
|
| 207 |
final_audio = np.concatenate(audio_out)
|
| 208 |
sf.write(combined_name, final_audio, sr, subtype="PCM_16")
|
| 209 |
|
|
@@ -308,37 +325,48 @@ def auto_process_dataset():
|
|
| 308 |
current_status = f"No sentences found in {txt_name}"
|
| 309 |
continue
|
| 310 |
|
| 311 |
-
current_status = (
|
| 312 |
-
f"Found {len(sentences)} sentences in {txt_name}"
|
| 313 |
-
)
|
| 314 |
print(f"Processing sentences from {txt_name}:")
|
| 315 |
|
| 316 |
temp_files = []
|
| 317 |
-
audio_files_info = [] #
|
| 318 |
commit_operations = []
|
| 319 |
|
|
|
|
|
|
|
|
|
|
| 320 |
# Process each sentence
|
| 321 |
for idx, sentence in enumerate(sentences):
|
| 322 |
if not auto_process_running:
|
| 323 |
break
|
| 324 |
|
| 325 |
current_status = (
|
| 326 |
-
f"Processing {txt_name}: sentence "
|
| 327 |
-
f"{idx + 1}/{len(sentences)}"
|
| 328 |
)
|
| 329 |
|
| 330 |
try:
|
| 331 |
-
if not sentence:
|
| 332 |
continue
|
| 333 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
# Add a period at the end if missing (helps with TTS prosody)
|
| 335 |
-
|
| 336 |
-
|
|
|
|
| 337 |
|
| 338 |
-
print(f" Sentence {idx+1}: '{
|
| 339 |
|
| 340 |
# Generate audio using IndexTTS2
|
| 341 |
-
output_filename = f"temp_{
|
| 342 |
|
| 343 |
# Capture stdout to get audio duration
|
| 344 |
import io
|
|
@@ -348,9 +376,9 @@ def auto_process_dataset():
|
|
| 348 |
with redirect_stdout(buf):
|
| 349 |
tts_model.infer(
|
| 350 |
spk_audio_prompt=reference_voice_path,
|
| 351 |
-
text=
|
| 352 |
output_path=output_filename,
|
| 353 |
-
verbose=True,
|
| 354 |
)
|
| 355 |
|
| 356 |
# Parse duration from output
|
|
@@ -363,21 +391,16 @@ def auto_process_dataset():
|
|
| 363 |
break
|
| 364 |
|
| 365 |
if duration is None:
|
| 366 |
-
# Fallback: read the file to get duration
|
| 367 |
audio_data, sr = sf.read(output_filename)
|
| 368 |
duration = len(audio_data) / sr
|
| 369 |
|
| 370 |
print(f" Generated audio: {duration:.2f} seconds")
|
| 371 |
|
| 372 |
-
# Store file info for combined audio
|
| 373 |
audio_files_info.append((output_filename, duration))
|
| 374 |
temp_files.append(output_filename)
|
| 375 |
|
| 376 |
-
#
|
| 377 |
-
output_path =
|
| 378 |
-
f"Affirmations/{txt_name}/"
|
| 379 |
-
f"{txt_name}_{idx+1:03d}.wav"
|
| 380 |
-
)
|
| 381 |
commit_operations.append(
|
| 382 |
CommitOperationAdd(
|
| 383 |
path_in_repo=output_path,
|
|
@@ -386,87 +409,35 @@ def auto_process_dataset():
|
|
| 386 |
)
|
| 387 |
|
| 388 |
except Exception as e:
|
| 389 |
-
current_status =
|
| 390 |
-
f"Error generating audio for sentence {idx+1}: {e}"
|
| 391 |
-
)
|
| 392 |
print(f"Generation error: {e}")
|
| 393 |
continue
|
| 394 |
|
| 395 |
-
#
|
| 396 |
-
|
| 397 |
-
current_status = (
|
| 398 |
-
f"Creating combined audio(s) for {txt_name}..."
|
| 399 |
-
)
|
| 400 |
-
combined_files = create_combined_audios(audio_files_info)
|
| 401 |
-
|
| 402 |
-
# Add combined files to upload operations
|
| 403 |
-
for i, (combined_file, duration) in enumerate(
|
| 404 |
-
combined_files
|
| 405 |
-
):
|
| 406 |
-
if len(combined_files) == 1:
|
| 407 |
-
combined_path = (
|
| 408 |
-
f"Affirmations/{txt_name}/"
|
| 409 |
-
f"{txt_name}_combined.wav"
|
| 410 |
-
)
|
| 411 |
-
else:
|
| 412 |
-
combined_path = (
|
| 413 |
-
f"Affirmations/{txt_name}/"
|
| 414 |
-
f"{txt_name}_combined_{i+1:03d}.wav"
|
| 415 |
-
)
|
| 416 |
-
|
| 417 |
-
commit_operations.append(
|
| 418 |
-
CommitOperationAdd(
|
| 419 |
-
path_in_repo=combined_path,
|
| 420 |
-
path_or_fileobj=combined_file,
|
| 421 |
-
)
|
| 422 |
-
)
|
| 423 |
-
temp_files.append(combined_file)
|
| 424 |
-
|
| 425 |
-
duration_min = int(duration // 60)
|
| 426 |
-
duration_sec = int(duration % 60)
|
| 427 |
-
print(
|
| 428 |
-
f" Combined file {i+1}: "
|
| 429 |
-
f"{duration_min}:{duration_sec:02d}"
|
| 430 |
-
)
|
| 431 |
|
| 432 |
# Upload all generated files
|
| 433 |
if commit_operations and auto_process_running:
|
| 434 |
-
total_individual = len(
|
| 435 |
-
total_combined = (
|
| 436 |
-
len(combined_files) if audio_files_info else 0
|
| 437 |
-
)
|
| 438 |
|
| 439 |
-
current_status =
|
| 440 |
-
f"Uploading {total_individual} individual + "
|
| 441 |
-
f"{total_combined} combined files for {txt_name}..."
|
| 442 |
-
)
|
| 443 |
|
| 444 |
try:
|
| 445 |
api.create_commit(
|
| 446 |
repo_id=output_dataset_id,
|
| 447 |
repo_type="dataset",
|
| 448 |
operations=commit_operations,
|
| 449 |
-
commit_message=
|
| 450 |
-
f"Add audio files for {txt_name} - "
|
| 451 |
-
f"{total_individual} individual + "
|
| 452 |
-
f"{total_combined} combined"
|
| 453 |
-
),
|
| 454 |
token=token,
|
| 455 |
)
|
| 456 |
-
current_status =
|
| 457 |
-
f"Successfully uploaded files for {txt_name}"
|
| 458 |
-
)
|
| 459 |
|
| 460 |
# Move TXT file to /done folder
|
| 461 |
-
current_status =
|
| 462 |
-
f"Moving {txt_name}.txt to /done folder..."
|
| 463 |
-
)
|
| 464 |
|
| 465 |
-
# Read file content
|
| 466 |
with open(txt_path, "rb") as f:
|
| 467 |
file_content = f.read()
|
| 468 |
|
| 469 |
-
# Create operations to move file
|
| 470 |
move_operations = [
|
| 471 |
CommitOperationAdd(
|
| 472 |
path_in_repo=f"done/{txt_file}",
|
|
@@ -479,22 +450,14 @@ def auto_process_dataset():
|
|
| 479 |
repo_id=input_dataset_id,
|
| 480 |
repo_type="dataset",
|
| 481 |
operations=move_operations,
|
| 482 |
-
commit_message=
|
| 483 |
-
f"Move {txt_name}.txt to /done after processing"
|
| 484 |
-
),
|
| 485 |
token=token,
|
| 486 |
)
|
| 487 |
|
| 488 |
-
current_status =
|
| 489 |
-
f"✅ Completed {txt_name}: "
|
| 490 |
-
f"{total_individual} individual + "
|
| 491 |
-
f"{total_combined} combined audio files"
|
| 492 |
-
)
|
| 493 |
|
| 494 |
except Exception as e:
|
| 495 |
-
current_status =
|
| 496 |
-
f"Upload/Move error for {txt_name}: {e}"
|
| 497 |
-
)
|
| 498 |
print(f"Error: {e}")
|
| 499 |
|
| 500 |
# Cleanup temporary files
|
|
@@ -505,7 +468,7 @@ def auto_process_dataset():
|
|
| 505 |
except Exception:
|
| 506 |
pass
|
| 507 |
|
| 508 |
-
time.sleep(2)
|
| 509 |
|
| 510 |
except Exception as e:
|
| 511 |
current_status = f"Error processing {txt_name}: {e}"
|
|
|
|
| 20 |
import time
|
| 21 |
from pathlib import Path
|
| 22 |
import tempfile
|
| 23 |
+
import re
|
| 24 |
|
| 25 |
# Set environment variables for HF Spaces
|
| 26 |
os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
|
|
|
|
| 41 |
PAUSE_DURATION = 3.0 # 3 seconds pause between audios
|
| 42 |
|
| 43 |
|
| 44 |
+
def sanitize_filename(text: str, max_len: int = 120) -> str:
|
| 45 |
+
"""
|
| 46 |
+
Make a safe filename from affirmation text:
|
| 47 |
+
- spaces -> underscores
|
| 48 |
+
- remove special chars
|
| 49 |
+
- collapse multiple underscores
|
| 50 |
+
- lowercase
|
| 51 |
+
- limit length
|
| 52 |
+
"""
|
| 53 |
+
text = text.strip().lower()
|
| 54 |
+
text = text.replace(" ", "_")
|
| 55 |
+
text = re.sub(r"[^a-z0-9_]", "", text)
|
| 56 |
+
text = re.sub(r"_+", "_", text).strip("_")
|
| 57 |
+
if not text:
|
| 58 |
+
text = "affirmation"
|
| 59 |
+
return text[:max_len]
|
| 60 |
+
|
| 61 |
+
|
| 62 |
def download_models():
|
| 63 |
"""Download models if they don't exist"""
|
| 64 |
checkpoints_dir = "./checkpoints"
|
|
|
|
| 169 |
# 1.5 Sekunden Intro-Stille vor der ersten Audio
|
| 170 |
silence_intro = np.zeros(int(sr * 1.5), dtype=np.int16)
|
| 171 |
audio_out.append(silence_intro)
|
| 172 |
+
|
| 173 |
for i, fp in enumerate(current_files):
|
| 174 |
+
data, _ = sf.read(fp, dtype="int16")
|
| 175 |
audio_out.append(data)
|
| 176 |
+
|
| 177 |
# Zwischen Affirmationen 3 Sekunden Pause
|
| 178 |
if i < len(current_files) - 1:
|
| 179 |
audio_out.append(silence_3s)
|
| 180 |
|
|
|
|
| 181 |
final_audio = np.concatenate(audio_out)
|
| 182 |
sf.write(combined_name, final_audio, sr, subtype="PCM_16")
|
| 183 |
|
|
|
|
| 212 |
# 1.5 Sekunden Intro-Stille vor der ersten Audio
|
| 213 |
silence_intro = np.zeros(int(sr * 1.5), dtype=np.int16)
|
| 214 |
audio_out.append(silence_intro)
|
| 215 |
+
|
| 216 |
for i, fp in enumerate(current_files):
|
| 217 |
+
data, _ = sf.read(fp, dtype="int16")
|
| 218 |
audio_out.append(data)
|
| 219 |
+
|
| 220 |
# Zwischen Affirmationen 3 Sekunden Pause
|
| 221 |
if i < len(current_files) - 1:
|
| 222 |
audio_out.append(silence_3s)
|
| 223 |
|
|
|
|
| 224 |
final_audio = np.concatenate(audio_out)
|
| 225 |
sf.write(combined_name, final_audio, sr, subtype="PCM_16")
|
| 226 |
|
|
|
|
| 325 |
current_status = f"No sentences found in {txt_name}"
|
| 326 |
continue
|
| 327 |
|
| 328 |
+
current_status = f"Found {len(sentences)} sentences in {txt_name}"
|
|
|
|
|
|
|
| 329 |
print(f"Processing sentences from {txt_name}:")
|
| 330 |
|
| 331 |
temp_files = []
|
| 332 |
+
audio_files_info = [] # still used for durations/logging, not for combining
|
| 333 |
commit_operations = []
|
| 334 |
|
| 335 |
+
# Track used filenames to avoid duplicates within same TXT
|
| 336 |
+
used_names = set()
|
| 337 |
+
|
| 338 |
# Process each sentence
|
| 339 |
for idx, sentence in enumerate(sentences):
|
| 340 |
if not auto_process_running:
|
| 341 |
break
|
| 342 |
|
| 343 |
current_status = (
|
| 344 |
+
f"Processing {txt_name}: sentence {idx + 1}/{len(sentences)}"
|
|
|
|
| 345 |
)
|
| 346 |
|
| 347 |
try:
|
| 348 |
+
if not sentence:
|
| 349 |
continue
|
| 350 |
|
| 351 |
+
# Filename should be the affirmation text (before adding punctuation)
|
| 352 |
+
base_name = sanitize_filename(sentence)
|
| 353 |
+
if base_name in used_names:
|
| 354 |
+
# avoid overwriting if identical sentence appears multiple times
|
| 355 |
+
suffix = 2
|
| 356 |
+
while f"{base_name}_{suffix}" in used_names:
|
| 357 |
+
suffix += 1
|
| 358 |
+
base_name = f"{base_name}_{suffix}"
|
| 359 |
+
used_names.add(base_name)
|
| 360 |
+
|
| 361 |
# Add a period at the end if missing (helps with TTS prosody)
|
| 362 |
+
tts_sentence = sentence
|
| 363 |
+
if tts_sentence[-1] not in ".!?":
|
| 364 |
+
tts_sentence = tts_sentence + "."
|
| 365 |
|
| 366 |
+
print(f" Sentence {idx+1}: '{tts_sentence}'")
|
| 367 |
|
| 368 |
# Generate audio using IndexTTS2
|
| 369 |
+
output_filename = f"temp_{base_name}.wav"
|
| 370 |
|
| 371 |
# Capture stdout to get audio duration
|
| 372 |
import io
|
|
|
|
| 376 |
with redirect_stdout(buf):
|
| 377 |
tts_model.infer(
|
| 378 |
spk_audio_prompt=reference_voice_path,
|
| 379 |
+
text=tts_sentence,
|
| 380 |
output_path=output_filename,
|
| 381 |
+
verbose=True,
|
| 382 |
)
|
| 383 |
|
| 384 |
# Parse duration from output
|
|
|
|
| 391 |
break
|
| 392 |
|
| 393 |
if duration is None:
|
|
|
|
| 394 |
audio_data, sr = sf.read(output_filename)
|
| 395 |
duration = len(audio_data) / sr
|
| 396 |
|
| 397 |
print(f" Generated audio: {duration:.2f} seconds")
|
| 398 |
|
|
|
|
| 399 |
audio_files_info.append((output_filename, duration))
|
| 400 |
temp_files.append(output_filename)
|
| 401 |
|
| 402 |
+
# Upload path: use affirmation name, no numbering
|
| 403 |
+
output_path = f"Affirmations/{txt_name}/{base_name}.wav"
|
|
|
|
|
|
|
|
|
|
| 404 |
commit_operations.append(
|
| 405 |
CommitOperationAdd(
|
| 406 |
path_in_repo=output_path,
|
|
|
|
| 409 |
)
|
| 410 |
|
| 411 |
except Exception as e:
|
| 412 |
+
current_status = f"Error generating audio for sentence {idx+1}: {e}"
|
|
|
|
|
|
|
| 413 |
print(f"Generation error: {e}")
|
| 414 |
continue
|
| 415 |
|
| 416 |
+
# ✅ NO MORE COMBINED AUDIO CREATION HERE
|
| 417 |
+
# (combined generation removed/disabled as requested)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 418 |
|
| 419 |
# Upload all generated files
|
| 420 |
if commit_operations and auto_process_running:
|
| 421 |
+
total_individual = len(commit_operations)
|
|
|
|
|
|
|
|
|
|
| 422 |
|
| 423 |
+
current_status = f"Uploading {total_individual} audio files for {txt_name}..."
|
|
|
|
|
|
|
|
|
|
| 424 |
|
| 425 |
try:
|
| 426 |
api.create_commit(
|
| 427 |
repo_id=output_dataset_id,
|
| 428 |
repo_type="dataset",
|
| 429 |
operations=commit_operations,
|
| 430 |
+
commit_message=f"Add {total_individual} audio files for {txt_name}",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 431 |
token=token,
|
| 432 |
)
|
| 433 |
+
current_status = f"Successfully uploaded files for {txt_name}"
|
|
|
|
|
|
|
| 434 |
|
| 435 |
# Move TXT file to /done folder
|
| 436 |
+
current_status = f"Moving {txt_name}.txt to /done folder..."
|
|
|
|
|
|
|
| 437 |
|
|
|
|
| 438 |
with open(txt_path, "rb") as f:
|
| 439 |
file_content = f.read()
|
| 440 |
|
|
|
|
| 441 |
move_operations = [
|
| 442 |
CommitOperationAdd(
|
| 443 |
path_in_repo=f"done/{txt_file}",
|
|
|
|
| 450 |
repo_id=input_dataset_id,
|
| 451 |
repo_type="dataset",
|
| 452 |
operations=move_operations,
|
| 453 |
+
commit_message=f"Move {txt_name}.txt to /done after processing",
|
|
|
|
|
|
|
| 454 |
token=token,
|
| 455 |
)
|
| 456 |
|
| 457 |
+
current_status = f"✅ Completed {txt_name}: {total_individual} audio files"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 458 |
|
| 459 |
except Exception as e:
|
| 460 |
+
current_status = f"Upload/Move error for {txt_name}: {e}"
|
|
|
|
|
|
|
| 461 |
print(f"Error: {e}")
|
| 462 |
|
| 463 |
# Cleanup temporary files
|
|
|
|
| 468 |
except Exception:
|
| 469 |
pass
|
| 470 |
|
| 471 |
+
time.sleep(2)
|
| 472 |
|
| 473 |
except Exception as e:
|
| 474 |
current_status = f"Error processing {txt_name}: {e}"
|