ghostai1 commited on
Commit
e351dd1
·
verified ·
1 Parent(s): 5334249

Update stablecuda12build1.py

Browse files
Files changed (1) hide show
  1. stablecuda12build1.py +73 -25
stablecuda12build1.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import os
3
  import torch
4
  import torchaudio
@@ -142,7 +141,7 @@ def balance_stereo(audio_segment, noise_threshold=-60, sample_rate=16000):
142
  avg_rms = (left_rms + right_rms) / 2
143
  stereo_samples[:, 0] = stereo_samples[:, 0] * (avg_rms / left_rms)
144
  stereo_samples[:, 1] = stereo_samples[:, 1] * (avg_rms / right_rms)
145
- balanced_samples = stereo_samples.flatten().astype(np.int16)
146
  balanced_segment = AudioSegment(
147
  balanced_samples.tobytes(),
148
  frame_rate=sample_rate,
@@ -172,7 +171,7 @@ def calculate_rms(segment):
172
  def rms_normalize(segment, target_rms_db=-23.0, peak_limit_db=-3.0, sample_rate=16000):
173
  logger.debug(f"Normalizing RMS for segment with target {target_rms_db} dBFS")
174
  try:
175
- target_rms = 10 ** (target_rms_db / 20) * 32767
176
  current_rms = calculate_rms(segment)
177
  if current_rms > 0:
178
  gain_factor = target_rms / current_rms
@@ -188,9 +187,9 @@ def rms_normalize(segment, target_rms_db=-23.0, peak_limit_db=-3.0, sample_rate=
188
  def hard_limit(audio_segment, limit_db=-3.0, sample_rate=16000):
189
  logger.debug(f"Applying hard limit at {limit_db} dBFS")
190
  try:
191
- limit = 10 ** (limit_db / 20.0) * 32767
192
  samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
193
- samples = np.clip(samples, -limit, limit).astype(np.int16)
194
  limited_segment = AudioSegment(
195
  samples.tobytes(),
196
  frame_rate=sample_rate,
@@ -205,7 +204,7 @@ def hard_limit(audio_segment, limit_db=-3.0, sample_rate=16000):
205
  return audio_segment
206
 
207
  def apply_eq(segment, sample_rate=16000):
208
- logger.debug(f"Applying EQ with sample_rate {sample_rate}")
209
  try:
210
  segment = segment.high_pass_filter(20)
211
  segment = segment.low_pass_filter(20000)
@@ -483,8 +482,30 @@ def set_bitrate_320():
483
  logger.info("Bitrate set to 320 kbps")
484
  return "320k"
485
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
486
  # Optimized generation function
487
- def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p: float, temperature: float, total_duration: int, bpm: int, drum_beat: str, synthesizer: str, rhythmic_steps: str, bass_style: str, guitar_style: str, target_volume: float, preset: str, max_steps: str, vram_status: str, bitrate: str):
488
  global musicgen_model
489
  if not instrumental_prompt.strip():
490
  logger.warning("Empty instrumental prompt provided")
@@ -498,10 +519,22 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
498
  except ValueError:
499
  logger.error(f"Invalid max_steps value: {max_steps}")
500
  return None, "❌ Invalid max_steps value; must be a number (1000, 1200, 1300, or 1500)", vram_status
 
 
 
 
 
 
 
 
 
 
 
 
 
501
  max_duration = min(max_steps_int / 50, 30) # Convert steps to seconds, cap at 30s
502
  total_duration = min(max(total_duration, 30), 120) # Clamp between 30s and 120s
503
- processing_sample_rate = 16000 # Lower for processing
504
- output_sample_rate = 32000 # MusicGen's native rate
505
  audio_segments = []
506
  overlap_duration = 0.2 # 200ms for continuation and crossfade
507
  remaining_duration = total_duration
@@ -520,7 +553,7 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
520
 
521
  # Set random seed for this generation run
522
  seed = random.randint(0, 10000)
523
- logger.info(f"Generating audio for {total_duration}s with seed={seed}, max_steps={max_steps_int}")
524
  base_prompt = instrumental_prompt
525
  clean_memory()
526
  vram_status = f"Initial VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
@@ -606,7 +639,7 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
606
  try:
607
  # Convert to float32 for torchaudio.save
608
  audio_segment_save = audio_segment.to(dtype=torch.float32)
609
- torchaudio.save(temp_wav_path, audio_segment_save, output_sample_rate, bits_per_sample=16)
610
  del audio_segment_save
611
  except Exception as e:
612
  logger.error(f"Failed to save audio segment for chunk {chunk_num}: {e}")
@@ -682,9 +715,9 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
682
  fade_in = hann_window
683
  blended_samples = (prev_samples * fade_out[:, None] + curr_samples * fade_in[:, None])
684
  blended_segment = AudioSegment(
685
- blended_samples.astype(np.int16).tobytes(),
686
  frame_rate=processing_sample_rate,
687
- sample_width=2,
688
  channels=2
689
  )
690
  blended_segment = rms_normalize(blended_segment, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=processing_sample_rate)
@@ -700,13 +733,13 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
700
  final_segment = apply_fade(final_segment)
701
  final_segment = balance_stereo(final_segment, noise_threshold=-60, sample_rate=processing_sample_rate)
702
  final_segment = final_segment - 10
703
- final_segment = final_segment.set_frame_rate(output_sample_rate) # Upsample to output rate
704
 
705
  mp3_path = f"output_adjusted_volume_{int(time.time())}.mp3"
706
  logger.info("⚠️ WARNING: Audio is set to safe levels (~ -23 dBFS RMS, -3 dBFS peak). Start playback at LOW volume (10-20%) and adjust gradually.")
707
  logger.info("VERIFY: Open the file in Audacity to check for static. RMS should be ~ -23 dBFS, peaks ≤ -3 dBFS. Report any static or issues.")
708
  try:
709
- logger.debug(f"Exporting final audio to {mp3_path} with bitrate {bitrate}")
710
  final_segment.export(
711
  mp3_path,
712
  format="mp3",
@@ -715,11 +748,12 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
715
  )
716
  logger.info(f"Final audio saved to {mp3_path}")
717
  except Exception as e:
718
- logger.error(f"Error exporting MP3: {e}")
 
719
  fallback_path = f"fallback_output_{int(time.time())}.mp3"
720
  try:
721
- final_segment.export(fallback_path, format="mp3", bitrate=bitrate)
722
- logger.info(f"Final audio saved to fallback: {fallback_path}")
723
  mp3_path = fallback_path
724
  except Exception as fallback_e:
725
  logger.error(f"Failed to save fallback MP3: {fallback_e}")
@@ -727,7 +761,7 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
727
 
728
  vram_status = f"Final VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
729
  logger.info(f"Generation completed in {time.time() - start_time:.2f} seconds")
730
- return mp3_path, "✅ Done! Generated static-free track with adjusted volume levels.", vram_status
731
  except Exception as e:
732
  logger.error(f"Failed to combine audio chunks: {e}")
733
  logger.error(traceback.format_exc())
@@ -742,7 +776,7 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
742
  # Clear inputs function
743
  def clear_inputs():
744
  logger.info("Clearing input fields")
745
- return "", 1.8, 120, 0.9, 0.8, 30, 120, "none", "none", "none", "none", "none", -23.0, "default", 1300, "96k"
746
 
747
  # Custom CSS
748
  css = """
@@ -782,13 +816,13 @@ p {
782
  border: 1px solid #A100FF;
783
  color: #E0E0E0;
784
  }
785
- .genre-buttons, .bitrate-buttons {
786
  display: flex;
787
  justify-content: center;
788
  flex-wrap: wrap;
789
  gap: 15px;
790
  }
791
- .genre-btn, .bitrate-btn, button {
792
  background: linear-gradient(45deg, #A100FF, #00FF9F);
793
  border: none;
794
  color: #0A0A0A;
@@ -955,10 +989,19 @@ with gr.Blocks(css=css) as demo:
955
  info="Number of generation steps per chunk (1000=~20s, 1500=~30s)."
956
  )
957
  bitrate_state = gr.State(value="96k") # Default bitrate
 
 
958
  with gr.Row(elem_classes="bitrate-buttons"):
959
  bitrate_128_btn = gr.Button("Set Bitrate to 128 kbps", elem_classes="bitrate-btn")
960
  bitrate_192_btn = gr.Button("Set Bitrate to 192 kbps", elem_classes="bitrate-btn")
961
  bitrate_320_btn = gr.Button("Set Bitrate to 320 kbps", elem_classes="bitrate-btn")
 
 
 
 
 
 
 
962
 
963
  with gr.Row(elem_classes="action-buttons"):
964
  gen_btn = gr.Button("Generate Music 🚀")
@@ -992,15 +1035,20 @@ with gr.Blocks(css=css) as demo:
992
  bitrate_128_btn.click(set_bitrate_128, inputs=None, outputs=bitrate_state)
993
  bitrate_192_btn.click(set_bitrate_192, inputs=None, outputs=bitrate_state)
994
  bitrate_320_btn.click(set_bitrate_320, inputs=None, outputs=bitrate_state)
 
 
 
 
 
995
  gen_btn.click(
996
  generate_music,
997
- inputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume, preset, max_steps, vram_status, bitrate_state],
998
  outputs=[out_audio, status, vram_status]
999
  )
1000
  clr_btn.click(
1001
  clear_inputs,
1002
  inputs=None,
1003
- outputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume, preset, max_steps, bitrate_state]
1004
  )
1005
  log_btn.click(
1006
  get_latest_log,
@@ -1028,4 +1076,4 @@ try:
1028
  except Exception as e:
1029
  logger.error(f"Failed to launch Gradio UI: {e}")
1030
  logger.error(traceback.format_exc())
1031
- sys.exit(1)
 
 
1
  import os
2
  import torch
3
  import torchaudio
 
141
  avg_rms = (left_rms + right_rms) / 2
142
  stereo_samples[:, 0] = stereo_samples[:, 0] * (avg_rms / left_rms)
143
  stereo_samples[:, 1] = stereo_samples[:, 1] * (avg_rms / right_rms)
144
+ balanced_samples = stereo_samples.flatten().astype(np.int32 if audio_segment.sample_width == 3 else np.int16)
145
  balanced_segment = AudioSegment(
146
  balanced_samples.tobytes(),
147
  frame_rate=sample_rate,
 
171
  def rms_normalize(segment, target_rms_db=-23.0, peak_limit_db=-3.0, sample_rate=16000):
172
  logger.debug(f"Normalizing RMS for segment with target {target_rms_db} dBFS")
173
  try:
174
+ target_rms = 10 ** (target_rms_db / 20) * (2**23 if segment.sample_width == 3 else 32767)
175
  current_rms = calculate_rms(segment)
176
  if current_rms > 0:
177
  gain_factor = target_rms / current_rms
 
187
  def hard_limit(audio_segment, limit_db=-3.0, sample_rate=16000):
188
  logger.debug(f"Applying hard limit at {limit_db} dBFS")
189
  try:
190
+ limit = 10 ** (limit_db / 20.0) * (2**23 if audio_segment.sample_width == 3 else 32767)
191
  samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
192
+ samples = np.clip(samples, -limit, limit).astype(np.int32 if audio_segment.sample_width == 3 else np.int16)
193
  limited_segment = AudioSegment(
194
  samples.tobytes(),
195
  frame_rate=sample_rate,
 
204
  return audio_segment
205
 
206
  def apply_eq(segment, sample_rate=16000):
207
+ logger.debug(f"Applying EQ with sample rate {sample_rate}")
208
  try:
209
  segment = segment.high_pass_filter(20)
210
  segment = segment.low_pass_filter(20000)
 
482
  logger.info("Bitrate set to 320 kbps")
483
  return "320k"
484
 
485
+ # Sampling rate selection functions
486
+ def set_sample_rate_22050():
487
+ logger.info("Output sampling rate set to 22.05 kHz")
488
+ return "22050"
489
+
490
+ def set_sample_rate_44100():
491
+ logger.info("Output sampling rate set to 44.1 kHz")
492
+ return "44100"
493
+
494
+ def set_sample_rate_48000():
495
+ logger.info("Output sampling rate set to 48 kHz")
496
+ return "48000"
497
+
498
+ # Bit depth selection functions
499
+ def set_bit_depth_16():
500
+ logger.info("Bit depth set to 16-bit")
501
+ return "16"
502
+
503
+ def set_bit_depth_24():
504
+ logger.info("Bit depth set to 24-bit")
505
+ return "24"
506
+
507
  # Optimized generation function
508
+ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p: float, temperature: float, total_duration: int, bpm: int, drum_beat: str, synthesizer: str, rhythmic_steps: str, bass_style: str, guitar_style: str, target_volume: float, preset: str, max_steps: str, vram_status: str, bitrate: str, output_sample_rate: str, bit_depth: str):
509
  global musicgen_model
510
  if not instrumental_prompt.strip():
511
  logger.warning("Empty instrumental prompt provided")
 
519
  except ValueError:
520
  logger.error(f"Invalid max_steps value: {max_steps}")
521
  return None, "❌ Invalid max_steps value; must be a number (1000, 1200, 1300, or 1500)", vram_status
522
+ # Convert output_sample_rate to integer
523
+ try:
524
+ output_sample_rate_int = int(output_sample_rate)
525
+ except ValueError:
526
+ logger.error(f"Invalid output_sample_rate value: {output_sample_rate}")
527
+ return None, "❌ Invalid output sampling rate; must be a number (22050, 32000, 44100, or 48000)", vram_status
528
+ # Convert bit_depth to integer and set sample_width
529
+ try:
530
+ bit_depth_int = int(bit_depth)
531
+ sample_width = 3 if bit_depth_int == 24 else 2
532
+ except ValueError:
533
+ logger.error(f"Invalid bit_depth value: {bit_depth}")
534
+ return None, "❌ Invalid bit depth; must be 16 or 24", vram_status
535
  max_duration = min(max_steps_int / 50, 30) # Convert steps to seconds, cap at 30s
536
  total_duration = min(max(total_duration, 30), 120) # Clamp between 30s and 120s
537
+ processing_sample_rate = 16000 # Fixed for processing
 
538
  audio_segments = []
539
  overlap_duration = 0.2 # 200ms for continuation and crossfade
540
  remaining_duration = total_duration
 
553
 
554
  # Set random seed for this generation run
555
  seed = random.randint(0, 10000)
556
+ logger.info(f"Generating audio for {total_duration}s with seed={seed}, max_steps={max_steps_int}, output_sample_rate={output_sample_rate_int} Hz, bit_depth={bit_depth_int}-bit")
557
  base_prompt = instrumental_prompt
558
  clean_memory()
559
  vram_status = f"Initial VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
 
639
  try:
640
  # Convert to float32 for torchaudio.save
641
  audio_segment_save = audio_segment.to(dtype=torch.float32)
642
+ torchaudio.save(temp_wav_path, audio_segment_save, output_sample_rate_int, bits_per_sample=bit_depth_int)
643
  del audio_segment_save
644
  except Exception as e:
645
  logger.error(f"Failed to save audio segment for chunk {chunk_num}: {e}")
 
715
  fade_in = hann_window
716
  blended_samples = (prev_samples * fade_out[:, None] + curr_samples * fade_in[:, None])
717
  blended_segment = AudioSegment(
718
+ blended_samples.astype(np.int32 if sample_width == 3 else np.int16).tobytes(),
719
  frame_rate=processing_sample_rate,
720
+ sample_width=sample_width,
721
  channels=2
722
  )
723
  blended_segment = rms_normalize(blended_segment, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=processing_sample_rate)
 
733
  final_segment = apply_fade(final_segment)
734
  final_segment = balance_stereo(final_segment, noise_threshold=-60, sample_rate=processing_sample_rate)
735
  final_segment = final_segment - 10
736
+ final_segment = final_segment.set_frame_rate(output_sample_rate_int) # Set to selected output rate
737
 
738
  mp3_path = f"output_adjusted_volume_{int(time.time())}.mp3"
739
  logger.info("⚠️ WARNING: Audio is set to safe levels (~ -23 dBFS RMS, -3 dBFS peak). Start playback at LOW volume (10-20%) and adjust gradually.")
740
  logger.info("VERIFY: Open the file in Audacity to check for static. RMS should be ~ -23 dBFS, peaks ≤ -3 dBFS. Report any static or issues.")
741
  try:
742
+ logger.debug(f"Exporting final audio to {mp3_path} with bitrate {bitrate}, sample rate {output_sample_rate_int} Hz, bit depth {bit_depth_int}-bit")
743
  final_segment.export(
744
  mp3_path,
745
  format="mp3",
 
748
  )
749
  logger.info(f"Final audio saved to {mp3_path}")
750
  except Exception as e:
751
+ logger.error(f"Error exporting MP3 with bitrate {bitrate}: {e}")
752
+ logger.error(traceback.format_exc())
753
  fallback_path = f"fallback_output_{int(time.time())}.mp3"
754
  try:
755
+ final_segment.export(fallback_path, format="mp3", bitrate="128k")
756
+ logger.info(f"Final audio saved to fallback: {fallback_path} with 128 kbps")
757
  mp3_path = fallback_path
758
  except Exception as fallback_e:
759
  logger.error(f"Failed to save fallback MP3: {fallback_e}")
 
761
 
762
  vram_status = f"Final VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
763
  logger.info(f"Generation completed in {time.time() - start_time:.2f} seconds")
764
+ return mp3_path, "✅ Done! Generated track with adjusted volume levels. Check for static in Audacity.", vram_status
765
  except Exception as e:
766
  logger.error(f"Failed to combine audio chunks: {e}")
767
  logger.error(traceback.format_exc())
 
776
  # Clear inputs function
777
  def clear_inputs():
778
  logger.info("Clearing input fields")
779
+ return "", 1.8, 120, 0.9, 0.8, 30, 120, "none", "none", "none", "none", "none", -23.0, "default", 1300, "96k", "32000", "16"
780
 
781
  # Custom CSS
782
  css = """
 
816
  border: 1px solid #A100FF;
817
  color: #E0E0E0;
818
  }
819
+ .genre-buttons, .bitrate-buttons, .sample-rate-buttons, .bit-depth-buttons {
820
  display: flex;
821
  justify-content: center;
822
  flex-wrap: wrap;
823
  gap: 15px;
824
  }
825
+ .genre-btn, .bitrate-btn, .sample-rate-btn, .bit-depth-btn, button {
826
  background: linear-gradient(45deg, #A100FF, #00FF9F);
827
  border: none;
828
  color: #0A0A0A;
 
989
  info="Number of generation steps per chunk (1000=~20s, 1500=~30s)."
990
  )
991
  bitrate_state = gr.State(value="96k") # Default bitrate
992
+ sample_rate_state = gr.State(value="32000") # Default output sampling rate
993
+ bit_depth_state = gr.State(value="16") # Default bit depth
994
  with gr.Row(elem_classes="bitrate-buttons"):
995
  bitrate_128_btn = gr.Button("Set Bitrate to 128 kbps", elem_classes="bitrate-btn")
996
  bitrate_192_btn = gr.Button("Set Bitrate to 192 kbps", elem_classes="bitrate-btn")
997
  bitrate_320_btn = gr.Button("Set Bitrate to 320 kbps", elem_classes="bitrate-btn")
998
+ with gr.Row(elem_classes="sample-rate-buttons"):
999
+ sample_rate_22050_btn = gr.Button("Set Sampling Rate to 22.05 kHz", elem_classes="sample-rate-btn")
1000
+ sample_rate_44100_btn = gr.Button("Set Sampling Rate to 44.1 kHz", elem_classes="sample-rate-btn")
1001
+ sample_rate_48000_btn = gr.Button("Set Sampling Rate to 48 kHz", elem_classes="sample-rate-btn")
1002
+ with gr.Row(elem_classes="bit-depth-buttons"):
1003
+ bit_depth_16_btn = gr.Button("Set Bit Depth to 16-bit", elem_classes="bit-depth-btn")
1004
+ bit_depth_24_btn = gr.Button("Set Bit Depth to 24-bit", elem_classes="bit-depth-btn")
1005
 
1006
  with gr.Row(elem_classes="action-buttons"):
1007
  gen_btn = gr.Button("Generate Music 🚀")
 
1035
  bitrate_128_btn.click(set_bitrate_128, inputs=None, outputs=bitrate_state)
1036
  bitrate_192_btn.click(set_bitrate_192, inputs=None, outputs=bitrate_state)
1037
  bitrate_320_btn.click(set_bitrate_320, inputs=None, outputs=bitrate_state)
1038
+ sample_rate_22050_btn.click(set_sample_rate_22050, inputs=None, outputs=sample_rate_state)
1039
+ sample_rate_44100_btn.click(set_sample_rate_44100, inputs=None, outputs=sample_rate_state)
1040
+ sample_rate_48000_btn.click(set_sample_rate_48000, inputs=None, outputs=sample_rate_state)
1041
+ bit_depth_16_btn.click(set_bit_depth_16, inputs=None, outputs=bit_depth_state)
1042
+ bit_depth_24_btn.click(set_bit_depth_24, inputs=None, outputs=bit_depth_state)
1043
  gen_btn.click(
1044
  generate_music,
1045
+ inputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume, preset, max_steps, vram_status, bitrate_state, sample_rate_state, bit_depth_state],
1046
  outputs=[out_audio, status, vram_status]
1047
  )
1048
  clr_btn.click(
1049
  clear_inputs,
1050
  inputs=None,
1051
+ outputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume, preset, max_steps, bitrate_state, sample_rate_state, bit_depth_state]
1052
  )
1053
  log_btn.click(
1054
  get_latest_log,
 
1076
  except Exception as e:
1077
  logger.error(f"Failed to launch Gradio UI: {e}")
1078
  logger.error(traceback.format_exc())
1079
+ sys.exit(1)