File size: 38,617 Bytes
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3bfebb6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef411bc
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1fba260
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be99110
658e790
 
 
 
 
 
 
 
 
 
9362c34
 
658e790
 
 
 
 
 
ce7990e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9853042
658e790
ce7990e
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74aed6e
 
658e790
74aed6e
 
658e790
 
74aed6e
658e790
 
 
 
 
 
e3adaee
 
 
 
658e790
2ab39a8
 
 
 
658e790
e3adaee
 
658e790
 
 
 
 
 
 
 
 
 
 
 
e3adaee
 
 
 
 
 
035fdcd
 
 
 
 
 
 
 
 
 
 
e3adaee
 
035fdcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3adaee
ef411bc
035fdcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef411bc
 
 
 
 
 
 
e3adaee
658e790
 
 
e3adaee
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef411bc
 
 
 
 
 
658e790
 
 
 
 
 
 
 
 
 
 
 
 
7b364b8
 
658e790
 
7b364b8
658e790
 
7b364b8
 
 
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1fba260
 
 
 
 
 
 
 
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1fba260
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1fba260
 
 
 
 
 
 
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
#!/usr/bin/env python3
"""

MiloMusic - Hugging Face Spaces Version

AI-powered music generation platform optimized for cloud deployment with high-performance configuration.

"""

import os
import sys
import subprocess
import tempfile
import gradio as gr
import soundfile as sf
from dataclasses import dataclass, field
from typing import Any
import xxhash
import numpy as np
import spaces
import groq

# Import environment setup for Spaces
def setup_spaces_environment():
    """Setup environment variables and paths for Hugging Face Spaces"""
    # Set HuggingFace cache directory
    os.environ["HF_HOME"] = "/tmp/hf_cache"
    os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers_cache"
    os.environ["HF_HUB_CACHE"] = "/tmp/hf_hub_cache"
    
    # PyTorch CUDA memory optimization
    os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
    
    # Set temp directory for audio files
    os.environ["TMPDIR"] = "/tmp"
    
    print("🚀 Environment setup complete for Spaces")

# Install flash-attn if not already installed
def install_flash_attn():
    """Install flash-attn from source with proper compilation flags"""
    try:
        import flash_attn
        print("✅ flash-attn already installed")
        return True
    except ImportError:
        print("📦 Installing flash-attn from source...")
        try:
            # Install with optimized settings for Spaces
            cmd = [
                sys.executable, "-m", "pip", "install", 
                "--no-build-isolation", 
                "--no-cache-dir",
                "flash-attn",
                "--verbose"
            ]
            
            # Use more parallel jobs for faster compilation in Spaces
            env = os.environ.copy()
            env["MAX_JOBS"] = "4"  # Utilize more CPU cores
            env["NVCC_PREPEND_FLAGS"] = "-ccbin /usr/bin/gcc"
            
            result = subprocess.run(cmd, env=env, capture_output=True, text=True, timeout=1800)  # 30 min timeout
            
            if result.returncode == 0:
                print("✅ flash-attn installed successfully")
                return True
            else:
                print(f"❌ flash-attn installation failed: {result.stderr}")
                return False
                
        except subprocess.TimeoutExpired:
            print("⏰ flash-attn installation timed out")
            return False
        except Exception as e:
            print(f"❌ Error installing flash-attn: {e}")
            return False

# Setup environment first
setup_spaces_environment()

# Download required models for YuEGP inference
def download_required_models():
    """Download required model files at startup"""
    try:
        from download_models import ensure_model_availability
        print("🚀 Checking and downloading required models...")
        success = ensure_model_availability()
        if success:
            print("✅ Model setup completed successfully")
        else:
            print("⚠️ Some models may be missing - continuing with available resources")
        return success
    except ImportError as e:
        print(f"⚠️ Model download script not found: {e}")
        return False
    except Exception as e:
        print(f"❌ Error during model download: {e}")
        return False

# Download models before other setup
models_ready = download_required_models()

# Install flash-attn if needed
flash_attn_available = install_flash_attn()

# Apply transformers patches for performance optimization
def apply_transformers_patch():
    """

    Apply YuEGP transformers patches for high-performance generation.

    

    This function applies optimized transformers patches that provide:

    - 2x speed improvement for low VRAM profiles

    - 3x speed improvement for Stage 1 generation (16GB+ VRAM)

    - 2x speed improvement for Stage 2 generation (all profiles)

    

    The patches replace two key files in the transformers library:

    - models/llama/modeling_llama.py (LLaMA model optimizations)

    - generation/utils.py (generation utilities optimizations)

    

    Includes smart detection to avoid re-applying patches on restart.

    """
    try:
        import shutil
        import site
        import hashlib
        
        # Define source and target directories
        source_dir = os.path.join(project_root, "YuEGP", "transformers")
        
        # Get the site-packages directory where transformers is installed
        site_packages = site.getsitepackages()
        if not site_packages:
            # Fallback for some environments
            import transformers
            transformers_path = os.path.dirname(transformers.__file__)
            target_base = os.path.dirname(transformers_path)
        else:
            target_base = site_packages[0]
        
        target_dir = os.path.join(target_base, "transformers")
        
        # Check if source patches exist
        if not os.path.exists(source_dir):
            print("⚠️  YuEGP transformers patches not found, skipping optimization")
            return False
        
        if not os.path.exists(target_dir):
            print("⚠️  Transformers library not found, skipping patches")
            return False
        
        # Check if patches are already applied by comparing file hashes
        def get_file_hash(filepath):
            """Get MD5 hash of file content"""
            if not os.path.exists(filepath):
                return None
            with open(filepath, 'rb') as f:
                return hashlib.md5(f.read()).hexdigest()
        
        # Key files to check for patch status
        key_patches = [
            "models/llama/modeling_llama.py",
            "generation/utils.py"
        ]
        
        patches_needed = False
        for patch_file in key_patches:
            source_file = os.path.join(source_dir, patch_file)
            target_file = os.path.join(target_dir, patch_file)
            
            if os.path.exists(source_file):
                source_hash = get_file_hash(source_file)
                target_hash = get_file_hash(target_file)
                
                if source_hash != target_hash:
                    patches_needed = True
                    break
        
        if not patches_needed:
            print("✅ YuEGP transformers patches already applied, skipping re-installation")
            print("  📈 High-performance optimizations are active:")
            print("    • Stage 1 generation: 3x faster (16GB+ VRAM)")
            print("    • Stage 2 generation: 2x faster (all profiles)")
            return True
        
        # Apply patches by copying optimized files
        print("🔧 Applying YuEGP transformers patches for high-performance generation...")
        
        # Copy the patched files, preserving directory structure
        for root, dirs, files in os.walk(source_dir):
            # Calculate relative path from source_dir
            rel_path = os.path.relpath(root, source_dir)
            target_subdir = os.path.join(target_dir, rel_path) if rel_path != '.' else target_dir
            
            # Ensure target subdirectory exists
            os.makedirs(target_subdir, exist_ok=True)
            
            # Copy all Python files in this directory
            for file in files:
                if file.endswith('.py'):
                    src_file = os.path.join(root, file)
                    dst_file = os.path.join(target_subdir, file)
                    
                    shutil.copy2(src_file, dst_file)
                    print(f"  ✅ Patched: {os.path.relpath(dst_file, target_base)}")
        
        print("🚀 Transformers patches applied successfully!")
        print("  📈 Expected performance gains:")
        print("    • Stage 1 generation: 3x faster (16GB+ VRAM)")
        print("    • Stage 2 generation: 2x faster (all profiles)")
        return True
        
    except Exception as e:
        print(f"❌ Error applying transformers patches: {e}")
        print("   Continuing without patches - performance may be reduced")
        return False

# Now import the rest of the dependencies
# Add project root to Python path for imports
project_root = os.path.dirname(os.path.abspath(__file__))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from tools.groq_client import client as groq_client
from openai import OpenAI
from tools.generate_lyrics import generate_structured_lyrics, format_lyrics

# Apply patches after all imports are set up
patch_applied = apply_transformers_patch()

# Import CUDA info after flash-attn setup
import torch
if torch.cuda.is_available():
    print(f"🎮 GPU: {torch.cuda.get_device_name(0)}")
    print(f"💾 VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB")
else:
    print("⚠️  No CUDA GPU detected")

@dataclass
class AppState:
    """

    Maintains the application state throughout user interactions.

    """
    conversation: list = field(default_factory=list)
    stopped: bool = False
    model_outs: Any = None
    lyrics: str = ""
    genre: str = "pop"
    mood: str = "upbeat"
    theme: str = "love"

def validate_api_keys():
    """Validate required API keys for Spaces deployment"""
    required_keys = ["GROQ_API_KEY",  "GEMINI_API_KEY"]
    missing_keys = []
    
    for key in required_keys:
        if not os.getenv(key):
            missing_keys.append(key)
    
    if missing_keys:
        print(f"⚠️  Missing API keys: {missing_keys}")
        return False
    
    print("✅ All API keys validated")
    return True

def validate_file_structure():
    """Validate that required files and directories exist"""
    required_paths = [
        "YuEGP/inference/infer.py",
        "YuEGP/inference/codecmanipulator.py", 
        "YuEGP/inference/mmtokenizer.py",
        "tools/generate_lyrics.py",
        "tools/groq_client.py",
        "schemas/lyrics.py"  # Required for lyrics structure models
    ]
    
    missing_files = []
    for path in required_paths:
        if not os.path.exists(path):
            missing_files.append(path)
    
    if missing_files:
        print(f"⚠️  Missing required files: {missing_files}")
        return False
    
    print("✅ All required files found")
    return True

@spaces.GPU(duration=300)  # Increase GPU timeout to 15 minutes
def generate_music_spaces(lyrics: str, genre: str, mood: str, progress=gr.Progress()) -> str:
    """

    Generate music using YuE model with high-performance Spaces configuration

    """
    if not lyrics.strip():
        return "Please provide lyrics to generate music."
    
    try:
        progress(0.1, desc="Preparing lyrics...")
        
        # Use lyrics directly (already formatted from chat interface)
        formatted_lyrics = lyrics
        
        # Create temporary files
        with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as genre_file:
            genre_file.write(f"instrumental,{genre},{mood},male vocals")
            genre_file_path = genre_file.name
        
        # Convert lyrics format for YuEGP compatibility
        # YuEGP expects [VERSE], [CHORUS] format, but our AI generates **VERSE**, **CHORUS**
        import re
        
        # Extract only the actual lyrics content, removing AI commentary
        formatted_lyrics_for_yue = formatted_lyrics
        
        # Convert **VERSE 1** to [VERSE], **CHORUS** to [CHORUS], etc.
        formatted_lyrics_for_yue = re.sub(r'\*\*(VERSE\s*\d*)\*\*', r'[\1]', formatted_lyrics_for_yue)
        formatted_lyrics_for_yue = re.sub(r'\*\*(CHORUS)\*\*', r'[\1]', formatted_lyrics_for_yue)
        formatted_lyrics_for_yue = re.sub(r'\*\*(BRIDGE)\*\*', r'[\1]', formatted_lyrics_for_yue)
        formatted_lyrics_for_yue = re.sub(r'\*\*(OUTRO)\*\*', r'[\1]', formatted_lyrics_for_yue)
        
        # Remove AI commentary (lines that don't contain actual lyrics)
        lines = formatted_lyrics_for_yue.split('\n')
        clean_lines = []
        in_song = False
        
        for line in lines:
            line = line.strip()
            # Start collecting from first section marker
            if re.match(r'\[(VERSE|CHORUS|BRIDGE|OUTRO)', line):
                in_song = True
            # Stop at AI commentary
            if in_song and line and not line.startswith('[') and any(phrase in line.lower() for phrase in ['how do you like', 'would you like', 'let me know', 'take a look']):
                break
            if in_song:
                clean_lines.append(line)
        
        formatted_lyrics_for_yue = '\n'.join(clean_lines).strip()
        
        print(f"🐛 DEBUG - Original lyrics length: {len(formatted_lyrics)}")
        print(f"🐛 DEBUG - Converted lyrics for YuE: '{formatted_lyrics_for_yue}'")
        print(f"🐛 DEBUG - Converted lyrics length: {len(formatted_lyrics_for_yue)}")
        
        with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as lyrics_file:
            lyrics_file.write(formatted_lyrics_for_yue)
            lyrics_file_path = lyrics_file.name
        
        progress(0.2, desc="Setting up generation...")
        
        # Generate music with high-performance Spaces configuration
        output_dir = tempfile.mkdtemp()
        
        # High-performance command based on Spaces GPU resources
        # In Spaces, working directory is /app
        infer_script_path = os.path.join(os.getcwd(), "YuEGP", "inference", "infer.py")
        cmd = [
            sys.executable,
            infer_script_path,
            "--cuda_idx", "0",
            "--stage1_model", "m-a-p/YuE-s1-7B-anneal-en-cot",
            "--stage2_model", "m-a-p/YuE-s2-1B-general",
            "--genre_txt", genre_file_path,
            "--lyrics_txt", lyrics_file_path,
            "--run_n_segments", "1",        # Full segments for better quality
            "--stage2_batch_size", "2",     # Higher batch size for speed
            "--output_dir", output_dir,
            "--max_new_tokens", "2000",     # Full token count
            "--profile", "3",               # Highest performance profile
            "--verbose", "3",
            "--prompt_start_time", "0",
            "--prompt_end_time", "20",      # Full 30-second clips
        ]
        
        # Use flash attention if available, otherwise fallback
        if not flash_attn_available:
            cmd.append("--sdpa")
        
        # More detailed progress updates
        progress(0.1, desc="🚀 Initializing models...")
        progress(0.15, desc="📝 Processing lyrics...")
        progress(0.2, desc="🎵 Starting Stage 1 (7B model generation)...")
        
        # Extract parameters from cmd for logging
        run_n_segments = cmd[cmd.index("--run_n_segments") + 1] if "--run_n_segments" in cmd else "2"
        max_new_tokens = cmd[cmd.index("--max_new_tokens") + 1] if "--max_new_tokens" in cmd else "3000"
        
        print("🎵 Starting high-quality music generation...")
        print(f"📊 Generation settings: {run_n_segments} segments, {max_new_tokens} tokens, 30s audio")
        print(f"⏱️ Estimated time: 2-4 minutes for high-quality generation")
        print(f"Working directory: {os.getcwd()}")
        print(f"Command: {' '.join(cmd)}")
        
        # Change to YuEGP/inference directory for execution
        original_cwd = os.getcwd()
        inference_dir = os.path.join(os.getcwd(), "YuEGP", "inference")
        
        try:
            os.chdir(inference_dir)
            print(f"Changed to inference directory: {inference_dir}")
            cmd[1] = "infer.py"
            
            progress(0.25, desc="🔥 Stage 1: Running 7B parameter model...")
            
            # Start the subprocess
            import threading
            import time
            
            def parse_output_and_update_progress(process):
                """Parse subprocess output in real-time and update progress accordingly"""
                stage1_messages = [
                    "🧠 Stage 1: Generating musical concepts...",
                    "🎼 Stage 1: Creating melody patterns...",
                    "🎹 Stage 1: Composing harmony structure..."
                ]
                stage2_messages = [
                    "⚡ Starting Stage 2: Refining with 1B model...",
                    "🎵 Stage 2: Adding musical details...",
                    "🎶 Stage 2: Finalizing composition..."
                ]
                
                stage1_progress = [0.3, 0.45, 0.6]
                stage2_progress = [0.7, 0.8, 0.85]
                
                current_stage = 1
                stage1_step = 0
                stage2_step = 0
                
                output_lines = []
                
                try:
                    while True:
                        line = process.stdout.readline()
                        if not line:
                            break
                            
                        line = line.strip()
                        output_lines.append(line)
                        print(line)  # Still print for debugging
                        
                        # Check for stage transitions based on actual output
                        if "Stage 2 inference..." in line:
                            current_stage = 2
                            stage2_step = 0
                            progress(0.7, desc=stage2_messages[0])
                            print(f"⏳ {stage2_messages[0]}")
                        
                        elif "Stage 2 DONE" in line:
                            progress(0.9, desc="🔊 Decoding to audio format...")
                            print("⏳ 🔊 Decoding to audio format...")
                        
                        # Update Stage 1 progress periodically
                        elif current_stage == 1 and stage1_step < len(stage1_messages):
                            # Update Stage 1 progress every 15 seconds or on specific markers
                            if stage1_step < len(stage1_progress):
                                progress(stage1_progress[stage1_step], desc=stage1_messages[stage1_step])
                                print(f"⏳ {stage1_messages[stage1_step]}")
                                stage1_step += 1
                        
                        # Update Stage 2 progress periodically
                        elif current_stage == 2 and stage2_step < len(stage2_messages) - 1:
                            stage2_step += 1
                            if stage2_step < len(stage2_progress):
                                progress(stage2_progress[stage2_step], desc=stage2_messages[stage2_step])
                                print(f"⏳ {stage2_messages[stage2_step]}")
                
                except Exception as e:
                    print(f"Progress parsing error: {e}")
                
                return '\n'.join(output_lines)
            
            print(f"🚀 Executing command: {' '.join(cmd)}")
            
            # Use Popen for real-time output processing
            process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, 
                                     text=True, bufsize=1, universal_newlines=True)
            
            # Parse output in real-time
            stdout_output = parse_output_and_update_progress(process)
            
            # Wait for process to complete and get return code
            return_code = process.wait()
            
            # Create result object similar to subprocess.run
            class Result:
                def __init__(self, returncode, stdout, stderr=""):
                    self.returncode = returncode
                    self.stdout = stdout
                    self.stderr = stderr
            
            result = Result(return_code, stdout_output)
            
            # Print stdout and stderr for debugging
            if result.stdout:
                print(f"✅ Command output:\n{result.stdout}")
            if result.stderr:
                print(f"⚠️ Command stderr:\n{result.stderr}")
            print(f"📊 Return code: {result.returncode}")
            
        finally:
            os.chdir(original_cwd)
        
        progress(0.95, desc="🎉 Processing completed, finalizing output...")
        
        # Clean up input files
        os.unlink(genre_file_path)
        os.unlink(lyrics_file_path)
        
        if result.returncode == 0:
            # Find generated audio file
            import glob
            audio_files = glob.glob(os.path.join(output_dir, "**/*.mp3"), recursive=True)
            
            if audio_files:
                progress(1.0, desc="Music generation complete!")
                return audio_files[0]  # Return path to generated audio
            else:
                return "Music generation completed but no audio file found."
        else:
            error_msg = f"Return code: {result.returncode}\n"
            if result.stderr:
                error_msg += f"Error: {result.stderr[-1000:]}\n"
            if result.stdout:
                error_msg += f"Output: {result.stdout[-1000:]}"
            return f"Music generation failed:\n{error_msg}"
            
    except subprocess.TimeoutExpired:
        return "Music generation timed out after 20 minutes. Please try again."
    except Exception as e:
        return f"Error during music generation: {str(e)}"

def respond(message, state):
    """Enhanced response function for lyrics generation"""
    try:
        # Add user message to conversation
        state.conversation.append({"role": "user", "content": message})
        
        # Generate response using your existing lyrics generation logic
        song_structure = generate_structured_lyrics(
            state.conversation,
            state.genre, 
            state.mood, 
            state.theme
        )
        
        # Format the structured lyrics for display
        response = format_lyrics(song_structure)
        
        # Add assistant response
        state.conversation.append({"role": "assistant", "content": response})
        
        # Update lyrics if this looks like final lyrics
        if any(marker in response.lower() for marker in ["[verse", "[chorus", "[bridge"]):
            state.lyrics = response
        
        # Return conversation for display
        conversation_display = []
        for msg in state.conversation:
            role = "User" if msg["role"] == "user" else "Assistant"
            conversation_display.append([msg["content"] if msg["role"] == "user" else None, 
                                       msg["content"] if msg["role"] == "assistant" else None])
        
        return "", conversation_display, state
        
    except Exception as e:
        error_response = f"Sorry, I encountered an error: {str(e)}"
        state.conversation.append({"role": "assistant", "content": error_response})
        
        # Format conversation for display
        conversation_display = []
        for msg in state.conversation:
            conversation_display.append([msg["content"] if msg["role"] == "user" else None, 
                                       msg["content"] if msg["role"] == "assistant" else None])
        
        return "", conversation_display, state

def build_interface():
    """Build the Gradio interface optimized for Spaces with high performance"""
    
    with gr.Blocks(
        title="MiloMusic - AI Music Generation", 
        theme=gr.themes.Soft(),
        css="""

        .container { max-width: 1400px; margin: auto; }

        .performance-notice { background-color: #d4edda; padding: 15px; border-radius: 5px; margin: 10px 0; }

        .generation-status { background-color: #f8f9fa; padding: 10px; border-radius: 5px; }

        """
    ) as demo:
        
        # Header
        gr.Markdown("""

        # 🎵 MiloMusic - AI Music Generation

        ### Professional AI-powered music creation from natural language

        """)
        
        # Performance notice for Spaces
        gr.Markdown("""

        <div class="performance-notice">

        🚀 <strong>High-Performance Mode:</strong> Running on Spaces GPU with optimized settings for best quality.

        Generation time: ~3-5 minutes for professional-grade music with vocals and instruments.

        </div>

        """)
        
        state = gr.State(AppState())
        
        with gr.Row():
            with gr.Column(scale=2):
                # Input controls
                with gr.Group():
                    gr.Markdown("### 🎛️ Music Settings")
                    with gr.Row():
                        genre = gr.Dropdown(
                            choices=["pop", "rock", "jazz", "classical", "electronic", "folk", "r&b", "country", "hip-hop"],
                            value="pop", label="Genre"
                        )
                        mood = gr.Dropdown(
                            choices=["upbeat", "melancholic", "energetic", "calm", "romantic", "dark", "mysterious", "joyful"],
                            value="upbeat", label="Mood"
                        )
                        theme = gr.Dropdown(
                            choices=["love", "friendship", "adventure", "nostalgia", "freedom", "hope", "dreams", "nature"],
                            value="love", label="Theme"
                        )
                
                # Voice Input
                with gr.Group():
                    gr.Markdown("### 🎤 Voice Input")
                    input_audio = gr.Audio(
                        label="Speak Your Musical Ideas",
                        sources=["microphone"],
                        type="numpy",
                        streaming=False,
                        waveform_options=gr.WaveformOptions(waveform_color="#B83A4B"),
                    )
                
                # Chat interface
                with gr.Group():
                    gr.Markdown("### 💬 Lyrics Creation Chat")
                    chatbot = gr.Chatbot(height=400, label="AI Lyrics Assistant", show_copy_button=True)
                    
                    with gr.Row():
                        text_input = gr.Textbox(
                            placeholder="Or type your song idea here...", 
                            show_label=False, 
                            scale=4,
                            lines=2
                        )
                        send_btn = gr.Button("Send", scale=1, variant="primary")
            
            with gr.Column(scale=1):
                # Output controls
                with gr.Group():
                    gr.Markdown("### 🎵 Music Generation")
                    lyrics_display = gr.Textbox(
                        label="Current Lyrics", 
                        lines=12, 
                        interactive=True,
                        placeholder="Your generated lyrics will appear here..."
                    )
                    
                    generate_btn = gr.Button("🎼 Generate High-Quality Music", variant="primary", size="lg")
                    
                    with gr.Column():
                        music_output = gr.Audio(label="Generated Music", type="filepath", show_download_button=True)
                        
                        gr.Markdown("""

                        <div class="generation-status">

                        <strong>Generation Features:</strong><br>

                        • Full 30-second clips<br>

                        • Professional vocals<br>

                        • Rich instrumentation<br>

                        • High-fidelity audio

                        </div>

                        """)
                
                # Controls
                with gr.Group():
                    gr.Markdown("### 🔧 Controls")
                    new_song_btn = gr.Button("🆕 Start New Song")
                    clear_btn = gr.Button("🧹 Clear Chat")
        
        # Event handlers
        def update_state_settings(genre_val, mood_val, theme_val, state):
            state.genre = genre_val
            state.mood = mood_val  
            state.theme = theme_val
            return state
        
        # Update state when settings change
        for component in [genre, mood, theme]:
            component.change(
                fn=update_state_settings,
                inputs=[genre, mood, theme, state],
                outputs=[state]
            )
        
        # Voice recording functionality (from app.py)
        stream = input_audio.start_recording(
            process_audio,
            [input_audio, state],
            [input_audio, state],
        )

        respond_audio = input_audio.stop_recording(
            response_audio, [state, input_audio, genre, mood, theme], [state, chatbot]
        )

        restart = respond_audio.then(start_recording_user, [state], [input_audio]).then(
            lambda state: state, state, state, js=js_reset
        )
        
        # Text chat functionality
        send_btn.click(
            fn=respond,
            inputs=[text_input, state],
            outputs=[text_input, chatbot, state],
            queue=True
        )
        
        text_input.submit(
            fn=respond,
            inputs=[text_input, state],
            outputs=[text_input, chatbot, state],
            queue=True
        )
        
        # Music generation with progress
        generate_btn.click(
            fn=generate_music_spaces,
            inputs=[lyrics_display, genre, mood],
            outputs=[music_output],
            queue=True,
            show_progress=True
        )
        
        # Control buttons
        new_song_btn.click(
            fn=lambda: (AppState(), [], "", None, gr.Audio(recording=False)),
            outputs=[state, chatbot, lyrics_display, music_output, input_audio],
            cancels=[respond_audio, restart]
        )
        
        clear_btn.click(
            fn=lambda: [],
            outputs=[chatbot]
        )
        
        # Auto-update lyrics display when state changes
        state.change(
            fn=lambda s: s.lyrics,
            inputs=[state],
            outputs=[lyrics_display]
        )
        
        # Instructions
        gr.Markdown("""

        ### 📖 How to create your music:

        1. **Set your preferences**: Choose genre, mood, and theme

        2. **Voice or chat**: Either speak your ideas or type them in the chat

        3. **Refine the lyrics**: Ask for changes, different verses, or style adjustments  

        4. **Generate music**: Click the generate button for professional-quality output

        5. **Download & enjoy**: Your high-fidelity music with vocals and instruments

        

        **Tips**: Be specific about your vision - mention instruments, vocal style, or song structure!

        """)
        
        # Footer
        gr.Markdown("""

        ---

        <center>

        Made with ❤️ by the MiloMusic Team | Powered by YuE (乐) Model | 🤗 Hugging Face Spaces

        </center>

        """)
    
    return demo

# Audio transcription functions (from app.py)
def process_whisper_response(completion):
    """

    Process Whisper transcription response and filter out silence.

    """
    if completion.segments and len(completion.segments) > 0:
        no_speech_prob = completion.segments[0].get('no_speech_prob', 0)
        print("No speech prob:", no_speech_prob)

        if no_speech_prob > 0.7:
            print("No speech detected")
            return None

        return completion.text.strip()

    return None

def transcribe_audio(client, file_name):
    """

    Transcribe an audio file using the Whisper model via the Groq API.

    """
    if file_name is None:
        return None

    try:
        with open(file_name, "rb") as audio_file:
            with open("audio.wav", "wb") as f:
                f.write(audio_file.read())

            response = client.audio.transcriptions.create(
                model="whisper-large-v3-turbo",
                file=("audio.wav", audio_file),
                response_format="text",
                language="en",
            )
            
            # Process the response to filter out silence
            # For text response format, we need to check if response is meaningful
            if response and len(response.strip()) > 0:
                return response.strip()
            else:
                return None
                
    except Exception as e:
        print(f"Transcription error: {e}")
        return f"Error in audio transcription: {str(e)}"

def start_recording_user(state: AppState):
    """

    Reset the audio recording component for a new user input.

    """
    return None

def process_audio(audio: tuple, state: AppState):
    """

    Process recorded audio in real-time during recording.

    """
    return audio, state

@spaces.GPU(duration=40, progress=gr.Progress(track_tqdm=True))
def response_audio(state: AppState, audio: tuple, genre_value, mood_value, theme_value):
    """

    Process recorded audio and generate a response based on transcription.

    """
    if not audio:
        return state, []

    # Update state with current dropdown values
    state.genre, state.mood, state.theme = genre_value, mood_value, theme_value

    temp_dir = tempfile.gettempdir()
    file_name = os.path.join(temp_dir, f"{xxhash.xxh32(bytes(audio[1])).hexdigest()}.wav")

    sf.write(file_name, audio[1], audio[0], format="wav")

    api_key = os.environ.get("GROQ_API_KEY")
    if not api_key:
        raise ValueError("Please set the GROQ_API_KEY environment variable.")
    client = groq.Client(api_key=api_key)

    # Transcribe the audio file
    transcription = transcribe_audio(client, file_name)
    if transcription:
        if isinstance(transcription, str) and transcription.startswith("Error"):
            transcription = "Error in audio transcription."

        state.conversation.append({"role": "user", "content": transcription})

        assistant_message = generate_chat_completion(client, state.conversation, state.genre, state.mood, state.theme)

        state.conversation.append({"role": "assistant", "content": assistant_message})

        # Update lyrics from conversation
        state.lyrics = extract_lyrics_from_conversation(state.conversation)

        os.remove(file_name)

    # Format conversation for display
    conversation_display = []
    for msg in state.conversation:
        conversation_display.append([msg["content"] if msg["role"] == "user" else None, 
                                   msg["content"] if msg["role"] == "assistant" else None])
    
    return state, conversation_display

def extract_lyrics_from_conversation(conversation):
    """

    Extract lyrics from conversation history.

    """
    lyrics = ""
    for message in reversed(conversation):
        if message["role"] == "assistant" and "verse" in message["content"].lower() and "chorus" in message["content"].lower():
            lyrics = message["content"]
            break
    return lyrics

def generate_chat_completion(client, history, genre, mood, theme):
    """

    Generate an AI assistant response based on conversation history and song parameters.

    """
    messages = []
    system_prompt = f"""You are a creative AI music generator assistant. Help users create song lyrics in the {genre} genre with a {mood} mood about {theme}.

When generating lyrics, create a chorus and at least one verse. Format lyrics clearly with VERSE and CHORUS labels.

Ask if they like the lyrics or want changes. Be conversational, friendly, and creative.

Keep the lyrics appropriate for the selected genre, mood, and theme unless the user specifically requests changes."""

    messages.append({
        "role": "system",
        "content": system_prompt,
    })

    for message in history:
        messages.append(message)

    try:
        completion = client.chat.completions.create(
            model="meta-llama/llama-4-scout-17b-16e-instruct",
            messages=messages,
        )
        return completion.choices[0].message.content
    except Exception as e:
        return f"Error in generating chat completion: {str(e)}"

# JavaScript for frontend enhancements
js_reset = """

() => {

  var record = document.querySelector('.record-button');

  if (record) {

    record.textContent = "Just Start Talking!"

    record.style = "width: fit-content; padding-right: 0.5vw;"

  }

}

"""

# Build the interface
demo = build_interface()

if __name__ == "__main__":
    """

    Spaces entry point - optimized for high-performance deployment

    """
    print("🚀 Starting MiloMusic High-Performance Mode on Hugging Face Spaces...")
    print(f"📁 Working directory: {os.getcwd()}")
    print(f"📂 Directory contents: {os.listdir('.')}")
    
    # Validate file structure
    if not validate_file_structure():
        print("❌ Required files missing - please check your upload")
        sys.exit(1)
    
    # Validate environment
    if not validate_api_keys():
        print("⚠️  Some API keys missing - functionality may be limited")
    
    # Launch with optimized settings for Spaces
    demo.queue(
        default_concurrency_limit=5,  # Allow more concurrent users
        max_size=20
    ).launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,  # Spaces handles sharing
        show_error=True,
        quiet=False,
        favicon_path=None,
        ssl_verify=False
    )