Spaces:
shawange
/
Running on Zero

Mo2294 commited on
Commit
9d5ea3d
·
verified ·
1 Parent(s): 24b3a19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -100
app.py CHANGED
@@ -20,6 +20,7 @@ import threading
20
  import time
21
  from pathlib import Path
22
  import tempfile
 
23
 
24
  # Set environment variables for HF Spaces
25
  os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
@@ -40,6 +41,24 @@ MAX_COMBINED_DURATION = 30 * 60 # 30 minutes in seconds
40
  PAUSE_DURATION = 3.0 # 3 seconds pause between audios
41
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  def download_models():
44
  """Download models if they don't exist"""
45
  checkpoints_dir = "./checkpoints"
@@ -150,16 +169,15 @@ def create_combined_audios(audio_files_info):
150
  # 1.5 Sekunden Intro-Stille vor der ersten Audio
151
  silence_intro = np.zeros(int(sr * 1.5), dtype=np.int16)
152
  audio_out.append(silence_intro)
153
-
154
  for i, fp in enumerate(current_files):
155
- data, _ = sf.read(fp, dtype='int16')
156
  audio_out.append(data)
157
-
158
  # Zwischen Affirmationen 3 Sekunden Pause
159
  if i < len(current_files) - 1:
160
  audio_out.append(silence_3s)
161
 
162
-
163
  final_audio = np.concatenate(audio_out)
164
  sf.write(combined_name, final_audio, sr, subtype="PCM_16")
165
 
@@ -194,16 +212,15 @@ def create_combined_audios(audio_files_info):
194
  # 1.5 Sekunden Intro-Stille vor der ersten Audio
195
  silence_intro = np.zeros(int(sr * 1.5), dtype=np.int16)
196
  audio_out.append(silence_intro)
197
-
198
  for i, fp in enumerate(current_files):
199
- data, _ = sf.read(fp, dtype='int16')
200
  audio_out.append(data)
201
-
202
  # Zwischen Affirmationen 3 Sekunden Pause
203
  if i < len(current_files) - 1:
204
  audio_out.append(silence_3s)
205
 
206
-
207
  final_audio = np.concatenate(audio_out)
208
  sf.write(combined_name, final_audio, sr, subtype="PCM_16")
209
 
@@ -308,37 +325,48 @@ def auto_process_dataset():
308
  current_status = f"No sentences found in {txt_name}"
309
  continue
310
 
311
- current_status = (
312
- f"Found {len(sentences)} sentences in {txt_name}"
313
- )
314
  print(f"Processing sentences from {txt_name}:")
315
 
316
  temp_files = []
317
- audio_files_info = [] # Store (filepath, duration) tuples
318
  commit_operations = []
319
 
 
 
 
320
  # Process each sentence
321
  for idx, sentence in enumerate(sentences):
322
  if not auto_process_running:
323
  break
324
 
325
  current_status = (
326
- f"Processing {txt_name}: sentence "
327
- f"{idx + 1}/{len(sentences)}"
328
  )
329
 
330
  try:
331
- if not sentence: # Skip empty sentences
332
  continue
333
 
 
 
 
 
 
 
 
 
 
 
334
  # Add a period at the end if missing (helps with TTS prosody)
335
- if sentence[-1] not in ".!?":
336
- sentence = sentence + "."
 
337
 
338
- print(f" Sentence {idx+1}: '{sentence}'")
339
 
340
  # Generate audio using IndexTTS2
341
- output_filename = f"temp_{txt_name}_{idx+1:03d}.wav"
342
 
343
  # Capture stdout to get audio duration
344
  import io
@@ -348,9 +376,9 @@ def auto_process_dataset():
348
  with redirect_stdout(buf):
349
  tts_model.infer(
350
  spk_audio_prompt=reference_voice_path,
351
- text=sentence,
352
  output_path=output_filename,
353
- verbose=True, # Enable verbose to get duration
354
  )
355
 
356
  # Parse duration from output
@@ -363,21 +391,16 @@ def auto_process_dataset():
363
  break
364
 
365
  if duration is None:
366
- # Fallback: read the file to get duration
367
  audio_data, sr = sf.read(output_filename)
368
  duration = len(audio_data) / sr
369
 
370
  print(f" Generated audio: {duration:.2f} seconds")
371
 
372
- # Store file info for combined audio
373
  audio_files_info.append((output_filename, duration))
374
  temp_files.append(output_filename)
375
 
376
- # Prepare upload operation for individual file
377
- output_path = (
378
- f"Affirmations/{txt_name}/"
379
- f"{txt_name}_{idx+1:03d}.wav"
380
- )
381
  commit_operations.append(
382
  CommitOperationAdd(
383
  path_in_repo=output_path,
@@ -386,87 +409,35 @@ def auto_process_dataset():
386
  )
387
 
388
  except Exception as e:
389
- current_status = (
390
- f"Error generating audio for sentence {idx+1}: {e}"
391
- )
392
  print(f"Generation error: {e}")
393
  continue
394
 
395
- # Create combined audio file(s)
396
- if audio_files_info and auto_process_running:
397
- current_status = (
398
- f"Creating combined audio(s) for {txt_name}..."
399
- )
400
- combined_files = create_combined_audios(audio_files_info)
401
-
402
- # Add combined files to upload operations
403
- for i, (combined_file, duration) in enumerate(
404
- combined_files
405
- ):
406
- if len(combined_files) == 1:
407
- combined_path = (
408
- f"Affirmations/{txt_name}/"
409
- f"{txt_name}_combined.wav"
410
- )
411
- else:
412
- combined_path = (
413
- f"Affirmations/{txt_name}/"
414
- f"{txt_name}_combined_{i+1:03d}.wav"
415
- )
416
-
417
- commit_operations.append(
418
- CommitOperationAdd(
419
- path_in_repo=combined_path,
420
- path_or_fileobj=combined_file,
421
- )
422
- )
423
- temp_files.append(combined_file)
424
-
425
- duration_min = int(duration // 60)
426
- duration_sec = int(duration % 60)
427
- print(
428
- f" Combined file {i+1}: "
429
- f"{duration_min}:{duration_sec:02d}"
430
- )
431
 
432
  # Upload all generated files
433
  if commit_operations and auto_process_running:
434
- total_individual = len(audio_files_info)
435
- total_combined = (
436
- len(combined_files) if audio_files_info else 0
437
- )
438
 
439
- current_status = (
440
- f"Uploading {total_individual} individual + "
441
- f"{total_combined} combined files for {txt_name}..."
442
- )
443
 
444
  try:
445
  api.create_commit(
446
  repo_id=output_dataset_id,
447
  repo_type="dataset",
448
  operations=commit_operations,
449
- commit_message=(
450
- f"Add audio files for {txt_name} - "
451
- f"{total_individual} individual + "
452
- f"{total_combined} combined"
453
- ),
454
  token=token,
455
  )
456
- current_status = (
457
- f"Successfully uploaded files for {txt_name}"
458
- )
459
 
460
  # Move TXT file to /done folder
461
- current_status = (
462
- f"Moving {txt_name}.txt to /done folder..."
463
- )
464
 
465
- # Read file content
466
  with open(txt_path, "rb") as f:
467
  file_content = f.read()
468
 
469
- # Create operations to move file
470
  move_operations = [
471
  CommitOperationAdd(
472
  path_in_repo=f"done/{txt_file}",
@@ -479,22 +450,14 @@ def auto_process_dataset():
479
  repo_id=input_dataset_id,
480
  repo_type="dataset",
481
  operations=move_operations,
482
- commit_message=(
483
- f"Move {txt_name}.txt to /done after processing"
484
- ),
485
  token=token,
486
  )
487
 
488
- current_status = (
489
- f"✅ Completed {txt_name}: "
490
- f"{total_individual} individual + "
491
- f"{total_combined} combined audio files"
492
- )
493
 
494
  except Exception as e:
495
- current_status = (
496
- f"Upload/Move error for {txt_name}: {e}"
497
- )
498
  print(f"Error: {e}")
499
 
500
  # Cleanup temporary files
@@ -505,7 +468,7 @@ def auto_process_dataset():
505
  except Exception:
506
  pass
507
 
508
- time.sleep(2) # Small delay between files
509
 
510
  except Exception as e:
511
  current_status = f"Error processing {txt_name}: {e}"
 
20
  import time
21
  from pathlib import Path
22
  import tempfile
23
+ import re
24
 
25
  # Set environment variables for HF Spaces
26
  os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
 
41
  PAUSE_DURATION = 3.0 # 3 seconds pause between audios
42
 
43
 
44
+ def sanitize_filename(text: str, max_len: int = 120) -> str:
45
+ """
46
+ Make a safe filename from affirmation text:
47
+ - spaces -> underscores
48
+ - remove special chars
49
+ - collapse multiple underscores
50
+ - lowercase
51
+ - limit length
52
+ """
53
+ text = text.strip().lower()
54
+ text = text.replace(" ", "_")
55
+ text = re.sub(r"[^a-z0-9_]", "", text)
56
+ text = re.sub(r"_+", "_", text).strip("_")
57
+ if not text:
58
+ text = "affirmation"
59
+ return text[:max_len]
60
+
61
+
62
  def download_models():
63
  """Download models if they don't exist"""
64
  checkpoints_dir = "./checkpoints"
 
169
  # 1.5 Sekunden Intro-Stille vor der ersten Audio
170
  silence_intro = np.zeros(int(sr * 1.5), dtype=np.int16)
171
  audio_out.append(silence_intro)
172
+
173
  for i, fp in enumerate(current_files):
174
+ data, _ = sf.read(fp, dtype="int16")
175
  audio_out.append(data)
176
+
177
  # Zwischen Affirmationen 3 Sekunden Pause
178
  if i < len(current_files) - 1:
179
  audio_out.append(silence_3s)
180
 
 
181
  final_audio = np.concatenate(audio_out)
182
  sf.write(combined_name, final_audio, sr, subtype="PCM_16")
183
 
 
212
  # 1.5 Sekunden Intro-Stille vor der ersten Audio
213
  silence_intro = np.zeros(int(sr * 1.5), dtype=np.int16)
214
  audio_out.append(silence_intro)
215
+
216
  for i, fp in enumerate(current_files):
217
+ data, _ = sf.read(fp, dtype="int16")
218
  audio_out.append(data)
219
+
220
  # Zwischen Affirmationen 3 Sekunden Pause
221
  if i < len(current_files) - 1:
222
  audio_out.append(silence_3s)
223
 
 
224
  final_audio = np.concatenate(audio_out)
225
  sf.write(combined_name, final_audio, sr, subtype="PCM_16")
226
 
 
325
  current_status = f"No sentences found in {txt_name}"
326
  continue
327
 
328
+ current_status = f"Found {len(sentences)} sentences in {txt_name}"
 
 
329
  print(f"Processing sentences from {txt_name}:")
330
 
331
  temp_files = []
332
+ audio_files_info = [] # still used for durations/logging, not for combining
333
  commit_operations = []
334
 
335
+ # Track used filenames to avoid duplicates within same TXT
336
+ used_names = set()
337
+
338
  # Process each sentence
339
  for idx, sentence in enumerate(sentences):
340
  if not auto_process_running:
341
  break
342
 
343
  current_status = (
344
+ f"Processing {txt_name}: sentence {idx + 1}/{len(sentences)}"
 
345
  )
346
 
347
  try:
348
+ if not sentence:
349
  continue
350
 
351
+ # Filename should be the affirmation text (before adding punctuation)
352
+ base_name = sanitize_filename(sentence)
353
+ if base_name in used_names:
354
+ # avoid overwriting if identical sentence appears multiple times
355
+ suffix = 2
356
+ while f"{base_name}_{suffix}" in used_names:
357
+ suffix += 1
358
+ base_name = f"{base_name}_{suffix}"
359
+ used_names.add(base_name)
360
+
361
  # Add a period at the end if missing (helps with TTS prosody)
362
+ tts_sentence = sentence
363
+ if tts_sentence[-1] not in ".!?":
364
+ tts_sentence = tts_sentence + "."
365
 
366
+ print(f" Sentence {idx+1}: '{tts_sentence}'")
367
 
368
  # Generate audio using IndexTTS2
369
+ output_filename = f"temp_{base_name}.wav"
370
 
371
  # Capture stdout to get audio duration
372
  import io
 
376
  with redirect_stdout(buf):
377
  tts_model.infer(
378
  spk_audio_prompt=reference_voice_path,
379
+ text=tts_sentence,
380
  output_path=output_filename,
381
+ verbose=True,
382
  )
383
 
384
  # Parse duration from output
 
391
  break
392
 
393
  if duration is None:
 
394
  audio_data, sr = sf.read(output_filename)
395
  duration = len(audio_data) / sr
396
 
397
  print(f" Generated audio: {duration:.2f} seconds")
398
 
 
399
  audio_files_info.append((output_filename, duration))
400
  temp_files.append(output_filename)
401
 
402
+ # Upload path: use affirmation name, no numbering
403
+ output_path = f"Affirmations/{txt_name}/{base_name}.wav"
 
 
 
404
  commit_operations.append(
405
  CommitOperationAdd(
406
  path_in_repo=output_path,
 
409
  )
410
 
411
  except Exception as e:
412
+ current_status = f"Error generating audio for sentence {idx+1}: {e}"
 
 
413
  print(f"Generation error: {e}")
414
  continue
415
 
416
+ # NO MORE COMBINED AUDIO CREATION HERE
417
+ # (combined generation removed/disabled as requested)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
 
419
  # Upload all generated files
420
  if commit_operations and auto_process_running:
421
+ total_individual = len(commit_operations)
 
 
 
422
 
423
+ current_status = f"Uploading {total_individual} audio files for {txt_name}..."
 
 
 
424
 
425
  try:
426
  api.create_commit(
427
  repo_id=output_dataset_id,
428
  repo_type="dataset",
429
  operations=commit_operations,
430
+ commit_message=f"Add {total_individual} audio files for {txt_name}",
 
 
 
 
431
  token=token,
432
  )
433
+ current_status = f"Successfully uploaded files for {txt_name}"
 
 
434
 
435
  # Move TXT file to /done folder
436
+ current_status = f"Moving {txt_name}.txt to /done folder..."
 
 
437
 
 
438
  with open(txt_path, "rb") as f:
439
  file_content = f.read()
440
 
 
441
  move_operations = [
442
  CommitOperationAdd(
443
  path_in_repo=f"done/{txt_file}",
 
450
  repo_id=input_dataset_id,
451
  repo_type="dataset",
452
  operations=move_operations,
453
+ commit_message=f"Move {txt_name}.txt to /done after processing",
 
 
454
  token=token,
455
  )
456
 
457
+ current_status = f"✅ Completed {txt_name}: {total_individual} audio files"
 
 
 
 
458
 
459
  except Exception as e:
460
+ current_status = f"Upload/Move error for {txt_name}: {e}"
 
 
461
  print(f"Error: {e}")
462
 
463
  # Cleanup temporary files
 
468
  except Exception:
469
  pass
470
 
471
+ time.sleep(2)
472
 
473
  except Exception as e:
474
  current_status = f"Error processing {txt_name}: {e}"