File size: 24,749 Bytes
92e51ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24b3a19
 
92e51ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24b3a19
 
92e51ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
#!/usr/bin/env python3
"""
HuggingFace Spaces app.py for IndexTTS2 with Auto-Processing and Combined Audio
"""
import os
import sys
import subprocess
import gradio as gr
import torch
import numpy as np
import soundfile as sf
from huggingface_hub import (
    HfApi,
    hf_hub_download,
    CommitOperationAdd,
    list_repo_files,
    CommitOperationDelete,
)
import threading
import time
from pathlib import Path
import tempfile

# Set environment variables for HF Spaces
os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
os.environ["GRADIO_SERVER_PORT"] = "7860"

# Set up paths
current_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.append(current_dir)

# Global state for auto-processing
auto_process_running = False
auto_process_thread = None
current_status = "Ready"
tts_model = None

# Constants
MAX_COMBINED_DURATION = 30 * 60  # 30 minutes in seconds
PAUSE_DURATION = 3.0  # 3 seconds pause between audios


def download_models():
    """Download models if they don't exist"""
    checkpoints_dir = "./checkpoints"

    if not os.path.exists(checkpoints_dir):
        print("Downloading IndexTTS2 models...")
        try:
            from huggingface_hub import snapshot_download

            snapshot_download(
                repo_id="IndexTeam/IndexTTS-2",
                local_dir=checkpoints_dir,
                allow_patterns=[
                    "*.pth",
                    "*.pt",
                    "*.yaml",
                    "*.model",
                    "*.vocab",
                    "qwen0.6bemo4-merge/**",
                ],
            )
            print("Models downloaded successfully!")
        except Exception as e:
            print(f"Failed to download models: {e}")
            print(
                "Please download models manually from: "
                "https://huggingface.co/IndexTeam/IndexTTS-2"
            )


# Download models on startup
download_models()

# Initialize IndexTTS2 model after download
try:
    from indextts.infer_v2 import IndexTTS2

    tts_model = IndexTTS2(
        cfg_path="checkpoints/config.yaml",
        model_dir="checkpoints",
        use_fp16=True,  # Use FP16 for lower VRAM usage
        use_cuda_kernel=False,
        use_deepspeed=False,
    )
    print("IndexTTS2 model loaded successfully!")
except Exception as e:
    print(f"Error loading IndexTTS2 model: {e}")
    tts_model = None


def add_silence(duration_sec: float, sample_rate: int = 24000) -> np.ndarray:
    """Generate silence of specified duration in seconds."""
    return np.zeros(int(duration_sec * sample_rate), dtype=np.float32)


def parse_audio_duration_from_log(log_line: str):
    """Parse audio duration from log line like '>> Generated audio length: 4.89 seconds'"""
    if "Generated audio length:" in log_line:
        try:
            duration_str = (
                log_line.split("Generated audio length:")[1]
                .split("seconds")[0]
                .strip()
            )
            return float(duration_str)
        except Exception:
            return None
    return None


def create_combined_audios(audio_files_info):
    """
    Create combined audio file(s) with 3-second pauses,
    without changing pitch, samplerate or bitdepth.

    audio_files_info: List[(file_path, duration_in_seconds)]
    """

    # 1) Samplerate der ersten Datei korrekt auslesen (z.B. 22050 Hz von BigVGAN)
    first_file = audio_files_info[0][0]
    _, sr = sf.read(first_file, dtype="int16")

    # 3 Sekunden Stille in ORIGINAL-SAMPLERATE erzeugen
    silence_3s = np.zeros(int(sr * PAUSE_DURATION), dtype=np.int16)

    combined_files = []
    current_files = []
    current_duration = 0.0
    combined_index = 1

    for file_path, duration in audio_files_info:
        # "Was wäre die Länge, wenn wir diese Datei hinzufügen?"
        new_length = current_duration
        if current_files:
            new_length += PAUSE_DURATION
        new_length += duration

        # Wenn zu lang → speichern & neue Combined beginnen
        if new_length > MAX_COMBINED_DURATION and current_files:
            combined_name = (
                "temp_combined.wav"
                if combined_index == 1 and len(audio_files_info) <= 30
                else f"temp_combined_{combined_index:03d}.wav"
            )

            audio_out = []

            # 1.5 Sekunden Intro-Stille vor der ersten Audio
            silence_intro = np.zeros(int(sr * 1.5), dtype=np.int16)
            audio_out.append(silence_intro)
            
            for i, fp in enumerate(current_files):
                data, _ = sf.read(fp, dtype='int16')
                audio_out.append(data)
            
                # Zwischen Affirmationen 3 Sekunden Pause
                if i < len(current_files) - 1:
                    audio_out.append(silence_3s)


            final_audio = np.concatenate(audio_out)
            sf.write(combined_name, final_audio, sr, subtype="PCM_16")

            combined_files.append((combined_name, current_duration))
            print(
                f"Created combined file {combined_index}: "
                f"{int(current_duration // 60)}:{int(current_duration % 60):02d}"
            )
            combined_index += 1

            # Neue Combined-Gruppe beginnen mit aktueller Datei
            current_files = [file_path]
            current_duration = duration

        else:
            current_files.append(file_path)
            if len(current_files) == 1:
                current_duration = duration
            else:
                current_duration += PAUSE_DURATION + duration

    # Letzte Combined-Datei speichern
    if current_files:
        combined_name = (
            "temp_combined.wav"
            if combined_index == 1 and len(audio_files_info) <= 30
            else f"temp_combined_{combined_index:03d}.wav"
        )

        audio_out = []

        # 1.5 Sekunden Intro-Stille vor der ersten Audio
        silence_intro = np.zeros(int(sr * 1.5), dtype=np.int16)
        audio_out.append(silence_intro)
        
        for i, fp in enumerate(current_files):
            data, _ = sf.read(fp, dtype='int16')
            audio_out.append(data)
        
            # Zwischen Affirmationen 3 Sekunden Pause
            if i < len(current_files) - 1:
                audio_out.append(silence_3s)


        final_audio = np.concatenate(audio_out)
        sf.write(combined_name, final_audio, sr, subtype="PCM_16")

        combined_files.append((combined_name, current_duration))
        print(
            f"Created combined file {combined_index}: "
            f"{int(current_duration // 60)}:{int(current_duration % 60):02d}"
        )

    return combined_files


def auto_process_dataset():
    """
    Auto-process TXT files from Monarchtaba22/rawAffirmation
    Generate audio for each sentence (split by .-) and upload to output dataset
    Create combined audio(s) with 3s pauses, max 30 min each
    Move processed TXT files to /done folder
    """
    global auto_process_running, current_status, tts_model

    if tts_model is None:
        current_status = "Error: TTS model not loaded"
        return

    try:
        token = os.getenv("HF_TOKEN")
        if not token:
            current_status = "Error: HF_TOKEN not found in environment"
            return

        api = HfApi(token=token)
        input_dataset_id = "Mo2294/rawAffirmation"
        output_dataset_id = "Mo2294/outputAffirmation"

        # Download reference voice
        current_status = "Downloading reference voice Mo.wav..."
        reference_voice_path = hf_hub_download(
            repo_id=output_dataset_id,
            filename="Mo.wav",
            repo_type="dataset",
            token=token,
        )

        # Get list of TXT files from input dataset (excluding /done folder)
        current_status = "Scanning for TXT files..."
        try:
            repo_files = list_repo_files(
                repo_id=input_dataset_id, repo_type="dataset", token=token
            )
            # Filter for TXT files not in /done folder
            txt_files = [
                f
                for f in repo_files
                if f.endswith(".txt") and not f.startswith("done/")
            ]
        except Exception as e:
            current_status = f"Error listing files: {e}"
            return

        if not txt_files:
            current_status = "No TXT files found to process"
            return

        current_status = f"Found {len(txt_files)} TXT files to process"

        # Process each TXT file
        for txt_file in txt_files:
            if not auto_process_running:
                current_status = "Processing stopped by user"
                break

            txt_name = Path(txt_file).stem
            current_status = f"Processing: {txt_name}"

            try:
                # Download TXT file
                txt_path = hf_hub_download(
                    repo_id=input_dataset_id,
                    filename=txt_file,
                    repo_type="dataset",
                    token=token,
                )

                # Read and parse TXT content
                with open(txt_path, "r", encoding="utf-8") as f:
                    content = f.read()

                # IMPROVED SPLITTING - preserve the actual text
                raw_sentences = content.split(".-")
                sentences = []

                for s in raw_sentences:
                    cleaned = s.strip()
                    if cleaned:
                        # Remove only trailing punctuation if it's a single dash or dot
                        if cleaned.endswith("-") or cleaned.endswith("."):
                            cleaned = cleaned[:-1].rstrip()
                        sentences.append(cleaned)

                if not sentences:
                    current_status = f"No sentences found in {txt_name}"
                    continue

                current_status = (
                    f"Found {len(sentences)} sentences in {txt_name}"
                )
                print(f"Processing sentences from {txt_name}:")

                temp_files = []
                audio_files_info = []  # Store (filepath, duration) tuples
                commit_operations = []

                # Process each sentence
                for idx, sentence in enumerate(sentences):
                    if not auto_process_running:
                        break

                    current_status = (
                        f"Processing {txt_name}: sentence "
                        f"{idx + 1}/{len(sentences)}"
                    )

                    try:
                        if not sentence:  # Skip empty sentences
                            continue

                        # Add a period at the end if missing (helps with TTS prosody)
                        if sentence[-1] not in ".!?":
                            sentence = sentence + "."

                        print(f"  Sentence {idx+1}: '{sentence}'")

                        # Generate audio using IndexTTS2
                        output_filename = f"temp_{txt_name}_{idx+1:03d}.wav"

                        # Capture stdout to get audio duration
                        import io
                        from contextlib import redirect_stdout

                        buf = io.StringIO()
                        with redirect_stdout(buf):
                            tts_model.infer(
                                spk_audio_prompt=reference_voice_path,
                                text=sentence,
                                output_path=output_filename,
                                verbose=True,  # Enable verbose to get duration
                            )

                        # Parse duration from output
                        output_log = buf.getvalue()
                        duration = None
                        for line in output_log.split("\n"):
                            dur = parse_audio_duration_from_log(line)
                            if dur:
                                duration = dur
                                break

                        if duration is None:
                            # Fallback: read the file to get duration
                            audio_data, sr = sf.read(output_filename)
                            duration = len(audio_data) / sr

                        print(f"    Generated audio: {duration:.2f} seconds")

                        # Store file info for combined audio
                        audio_files_info.append((output_filename, duration))
                        temp_files.append(output_filename)

                        # Prepare upload operation for individual file
                        output_path = (
                            f"Affirmations/{txt_name}/"
                            f"{txt_name}_{idx+1:03d}.wav"
                        )
                        commit_operations.append(
                            CommitOperationAdd(
                                path_in_repo=output_path,
                                path_or_fileobj=output_filename,
                            )
                        )

                    except Exception as e:
                        current_status = (
                            f"Error generating audio for sentence {idx+1}: {e}"
                        )
                        print(f"Generation error: {e}")
                        continue

                # Create combined audio file(s)
                if audio_files_info and auto_process_running:
                    current_status = (
                        f"Creating combined audio(s) for {txt_name}..."
                    )
                    combined_files = create_combined_audios(audio_files_info)

                    # Add combined files to upload operations
                    for i, (combined_file, duration) in enumerate(
                        combined_files
                    ):
                        if len(combined_files) == 1:
                            combined_path = (
                                f"Affirmations/{txt_name}/"
                                f"{txt_name}_combined.wav"
                            )
                        else:
                            combined_path = (
                                f"Affirmations/{txt_name}/"
                                f"{txt_name}_combined_{i+1:03d}.wav"
                            )

                        commit_operations.append(
                            CommitOperationAdd(
                                path_in_repo=combined_path,
                                path_or_fileobj=combined_file,
                            )
                        )
                        temp_files.append(combined_file)

                        duration_min = int(duration // 60)
                        duration_sec = int(duration % 60)
                        print(
                            f"  Combined file {i+1}: "
                            f"{duration_min}:{duration_sec:02d}"
                        )

                # Upload all generated files
                if commit_operations and auto_process_running:
                    total_individual = len(audio_files_info)
                    total_combined = (
                        len(combined_files) if audio_files_info else 0
                    )

                    current_status = (
                        f"Uploading {total_individual} individual + "
                        f"{total_combined} combined files for {txt_name}..."
                    )

                    try:
                        api.create_commit(
                            repo_id=output_dataset_id,
                            repo_type="dataset",
                            operations=commit_operations,
                            commit_message=(
                                f"Add audio files for {txt_name} - "
                                f"{total_individual} individual + "
                                f"{total_combined} combined"
                            ),
                            token=token,
                        )
                        current_status = (
                            f"Successfully uploaded files for {txt_name}"
                        )

                        # Move TXT file to /done folder
                        current_status = (
                            f"Moving {txt_name}.txt to /done folder..."
                        )

                        # Read file content
                        with open(txt_path, "rb") as f:
                            file_content = f.read()

                        # Create operations to move file
                        move_operations = [
                            CommitOperationAdd(
                                path_in_repo=f"done/{txt_file}",
                                path_or_fileobj=file_content,
                            ),
                            CommitOperationDelete(path_in_repo=txt_file),
                        ]

                        api.create_commit(
                            repo_id=input_dataset_id,
                            repo_type="dataset",
                            operations=move_operations,
                            commit_message=(
                                f"Move {txt_name}.txt to /done after processing"
                            ),
                            token=token,
                        )

                        current_status = (
                            f"✅ Completed {txt_name}: "
                            f"{total_individual} individual + "
                            f"{total_combined} combined audio files"
                        )

                    except Exception as e:
                        current_status = (
                            f"Upload/Move error for {txt_name}: {e}"
                        )
                        print(f"Error: {e}")

                # Cleanup temporary files
                for temp_file in temp_files:
                    try:
                        if os.path.exists(temp_file):
                            os.remove(temp_file)
                    except Exception:
                        pass

                time.sleep(2)  # Small delay between files

            except Exception as e:
                current_status = f"Error processing {txt_name}: {e}"
                print(f"Error: {e}")
                continue

        if auto_process_running:
            current_status = "✅ Auto-processing completed successfully!"
        else:
            current_status = "⏹️ Auto-processing stopped"

    except Exception as e:
        current_status = f"❌ Fatal error: {str(e)}"
        print(f"Fatal error: {e}")
    finally:
        auto_process_running = False


def start_auto_process():
    """Start the auto-processing thread"""
    global auto_process_running, auto_process_thread

    if auto_process_running:
        return "Auto-processing already running!", current_status

    auto_process_running = True
    auto_process_thread = threading.Thread(target=auto_process_dataset)
    auto_process_thread.start()
    return "✅ Auto-processing started!", "Starting..."


def stop_auto_process():
    """Stop the auto-processing"""
    global auto_process_running
    auto_process_running = False
    return "⏹️ Stop signal sent!", current_status


def get_status():
    """Get current processing status"""
    global auto_process_running
    if auto_process_running:
        return current_status + " 🔄"
    return current_status


def manual_generate(text, reference_audio, emotion_audio, emo_alpha, use_emo_text):
    """Manual TTS generation"""
    global tts_model

    if tts_model is None:
        return None

    if not reference_audio:
        return None

    try:
        output_path = "manual_output.wav"

        if emotion_audio:
            tts_model.infer(
                spk_audio_prompt=reference_audio,
                text=text,
                output_path=output_path,
                emo_audio_prompt=emotion_audio,
                emo_alpha=emo_alpha,
                verbose=False,
            )
        else:
            tts_model.infer(
                spk_audio_prompt=reference_audio,
                text=text,
                output_path=output_path,
                use_emo_text=use_emo_text,
                emo_alpha=emo_alpha if use_emo_text else 1.0,
                verbose=False,
            )

        # Read the generated file
        audio_data, sample_rate = sf.read(output_path)
        return (sample_rate, audio_data)

    except Exception as e:
        print(f"Generation error: {e}")
        return None


# Create Gradio interface
with gr.Blocks(title="IndexTTS2 with Auto-Processing") as demo:
    gr.Markdown("# 🎤 IndexTTS2 Voice Synthesis")
    gr.Markdown(
        "State-of-the-art TTS with auto-processing and combined audio generation"
    )

    # Manual tab
    with gr.Tab("Manual Processing"):
        with gr.Row():
            with gr.Column():
                text_input = gr.Textbox(
                    label="Text to synthesize",
                    placeholder="Enter text here...",
                    lines=3,
                    value="大家好,我现在正在体验AI科技!",
                )
                reference_audio = gr.Audio(
                    sources=["upload"],
                    type="filepath",
                    label="Voice reference (required)",
                )
                emotion_audio = gr.Audio(
                    sources=["upload"],
                    type="filepath",
                    label="Emotion reference (optional)",
                )

                with gr.Row():
                    emo_alpha = gr.Slider(
                        minimum=0.0,
                        maximum=1.0,
                        value=0.6,
                        step=0.1,
                        label="Emotion strength",
                    )
                    use_emo_text = gr.Checkbox(
                        label="Use text-based emotion", value=False
                    )

            with gr.Column():
                generate_btn = gr.Button(
                    "🎙️ Generate", variant="primary", size="lg"
                )
                output_audio = gr.Audio(label="Generated audio", type="numpy")

        generate_btn.click(
            manual_generate,
            inputs=[
                text_input,
                reference_audio,
                emotion_audio,
                emo_alpha,
                use_emo_text,
            ],
            outputs=output_audio,
        )

    # Auto-processing tab
    with gr.Tab("Auto Processing"):
        gr.Markdown("### 🚀 Automatic Dataset Processing with Combined Audio")

        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown(
                    """
                **Configuration:**
                - 📁 Input: `Mo2294/rawAffirmation`
                - 📂 Output: `Mo2294/outputAffirmation`
                - 🎙️ Voice: `Mo.wav`
                - ✂️ Delimiter: `.-`
                - 📝 Structure: `/Affirmations/[name]/`
                - ⏰ Combined: Max 30 min chunks
                - ⏸️ Pauses: 3 seconds between audios
                """
                )

            with gr.Column(scale=2):
                status_display = gr.Textbox(
                    label="📊 Processing Status",
                    value=get_status(),
                    interactive=False,
                    lines=3,
                )

                with gr.Row():
                    start_btn = gr.Button(
                        "▶️ Start Processing", variant="primary", scale=2
                    )
                    stop_btn = gr.Button("⏹️ Stop", variant="stop", scale=1)
                    refresh_btn = gr.Button("🔄 Refresh", scale=1)

                message_display = gr.Textbox(
                    label="Message", interactive=False, visible=False
                )

        # Event handlers
        start_btn.click(
            start_auto_process, outputs=[message_display, status_display]
        )
        stop_btn.click(
            stop_auto_process, outputs=[message_display, status_display]
        )
        refresh_btn.click(get_status, outputs=status_display)

    # Footer
    gr.Markdown(
        """
    ---
    <div align="center">
    <a href="https://github.com/index-tts/index-tts">GitHub</a> | 
    <a href="https://arxiv.org/abs/2506.21619">Paper</a> | 
    <a href="https://index-tts.github.io/index-tts2.github.io/">Demo</a>
    </div>
    """
    )

if __name__ == "__main__":
    demo.launch()