lainlives commited on
Commit
2247d0e
·
1 Parent(s): 9e7977b
Files changed (1) hide show
  1. app.py +0 -329
app.py CHANGED
@@ -2666,335 +2666,6 @@ def _pair_audio_tracks_and_gain(
2666
  if audio_track
2667
  ]
2668
 
2669
- @cache
2670
- def _get_voice_converter() -> VoiceConverter:
2671
- """
2672
- Get a voice converter.
2673
-
2674
- Returns
2675
- -------
2676
- VoiceConverter
2677
- A voice converter.
2678
-
2679
- """
2680
- from ultimate_rvc.rvc.infer.infer import VoiceConverter # noqa: PLC0415
2681
-
2682
- return VoiceConverter()
2683
-
2684
-
2685
- def convert(
2686
- audio_track: StrPath,
2687
- directory: StrPath,
2688
- model_name: str,
2689
- n_octaves: int = 0,
2690
- n_semitones: int = 0,
2691
- f0_methods: Sequence[F0Method] | None = None,
2692
- index_rate: float = 0.3,
2693
- rms_mix_rate: float = 1.0,
2694
- protect_rate: float = 0.33,
2695
- hop_length: int = 128,
2696
- split_audio: bool = False,
2697
- autotune_audio: bool = False,
2698
- autotune_strength: float = 1.0,
2699
- clean_audio: bool = False,
2700
- clean_strength: float = 0.7,
2701
- embedder_model: EmbedderModel = EmbedderModel.CONTENTVEC,
2702
- custom_embedder_model: str | None = None,
2703
- sid: int = 0,
2704
- content_type: RVCContentType = RVCContentType.AUDIO,
2705
- make_directory: bool = False,
2706
- ) -> Path:
2707
- """
2708
- Convert an audio track using an RVC model.
2709
-
2710
- Parameters
2711
- ----------
2712
- audio_track : StrPath
2713
- The path to the audio track to convert.
2714
- directory : StrPath
2715
- The path to the directory where the converted audio track
2716
- will be saved.
2717
- model_name : str
2718
- The name of the model to use for voice conversion.
2719
- n_octaves : int, default=0
2720
- The number of octaves to pitch-shift the converted audio by.
2721
- n_semitones : int, default=0
2722
- The number of semitones to pitch-shift the converted audio by.
2723
- f0_methods : Sequence[F0Method], optional
2724
- The methods to use for pitch extraction. If None, the method
2725
- used is rmvpe.
2726
- index_rate : float, default=0.3
2727
- The influence of the index file on the voice conversion.
2728
- rms_mix_rate : float, default = 1.0
2729
- The blending rate of the volume envelope of the converted
2730
- audio.
2731
- protect_rate : float, default=0.33
2732
- The protection rate for consonants and breathing sounds.
2733
- hop_length : int, default=128
2734
- The hop length to use for CREPE-based pitch extraction.
2735
- split_audio : bool, default=False
2736
- Whether to split the audio track into smaller segments before
2737
- converting it.
2738
- autotune_audio : bool, default=False
2739
- Whether to apply autotune to the converted audio.
2740
- autotune_strength : float, default=1.0
2741
- The strength of the autotune to apply to the converted audio.
2742
- clean_audio : bool, default=False
2743
- Whether to clean the converted audio.
2744
- clean_strength : float, default=0.7
2745
- The intensity of the cleaning to apply to the converted audio.
2746
- embedder_model : EmbedderModel, default=EmbedderModel.CONTENTVEC
2747
- The model to use for generating speaker embeddings.
2748
- custom_embedder_model : str, optional
2749
- The name of a custom embedder model to use for generating
2750
- speaker embeddings.
2751
- sid : int, default=0
2752
- The speaker id to use for multi-speaker models.
2753
- content_type : RVCContentType, default=RVCContentType.AUDIO
2754
- The type of content to convert. Determines what is shown in
2755
- display mesages and saved file names.
2756
- make_directory : bool, default=False
2757
- Whether to create the directory where the converted audio
2758
- track will be saved if it does not exist.
2759
-
2760
- Returns
2761
- -------
2762
- Path
2763
- The path to the converted audio track.
2764
-
2765
- """
2766
- match content_type:
2767
- case RVCContentType.VOCALS:
2768
- track_entity = Entity.VOCALS_TRACK
2769
- directory_entity = Entity.SONG_DIR
2770
- case RVCContentType.VOICE:
2771
- track_entity = Entity.VOICE_TRACK
2772
- directory_entity = Entity.DIRECTORY
2773
- case RVCContentType.SPEECH:
2774
- track_entity = Entity.SPEECH_TRACK
2775
- directory_entity = Entity.DIRECTORY
2776
- case RVCContentType.AUDIO:
2777
- track_entity = Entity.AUDIO_TRACK
2778
- directory_entity = Entity.DIRECTORY
2779
- audio_path = validate_audio_file_exists(audio_track, track_entity)
2780
- if make_directory:
2781
- Path(directory).mkdir(parents=True, exist_ok=True)
2782
- directory_path = validate_audio_dir_exists(directory, directory_entity)
2783
- validate_model(model_name, Entity.VOICE_MODEL)
2784
- custom_embedder_model_path = None
2785
- if embedder_model == EmbedderModel.CUSTOM:
2786
- custom_embedder_model_path = validate_model(
2787
- custom_embedder_model,
2788
- Entity.CUSTOM_EMBEDDER_MODEL,
2789
- )
2790
-
2791
- audio_path = wavify(
2792
- audio_path,
2793
- directory_path,
2794
- "20_Input",
2795
- accepted_formats={AudioExt.M4A, AudioExt.AAC},
2796
- )
2797
-
2798
- n_semitones = n_octaves * 12 + n_semitones
2799
- f0_methods_set = set(f0_methods) if f0_methods else {F0Method.RMVPE}
2800
-
2801
- args_dict = RVCAudioMetaData(
2802
- audio_track=FileMetaData(
2803
- name=audio_path.name,
2804
- hash_id=get_file_hash(audio_path),
2805
- ),
2806
- model_name=model_name,
2807
- n_semitones=n_semitones,
2808
- f0_methods=sorted(f0_methods_set),
2809
- index_rate=index_rate,
2810
- rms_mix_rate=rms_mix_rate,
2811
- protect_rate=protect_rate,
2812
- hop_length=hop_length,
2813
- split_audio=split_audio,
2814
- autotune_audio=autotune_audio,
2815
- autotune_strength=autotune_strength,
2816
- clean_audio=clean_audio,
2817
- clean_strength=clean_strength,
2818
- embedder_model=embedder_model,
2819
- custom_embedder_model=custom_embedder_model,
2820
- sid=sid,
2821
- ).model_dump()
2822
-
2823
- paths = [
2824
- get_unique_base_path(
2825
- directory_path,
2826
- f"21_{content_type.capitalize()}_Converted",
2827
- args_dict,
2828
- ).with_suffix(suffix)
2829
- for suffix in [".wav", ".json"]
2830
- ]
2831
-
2832
- converted_audio_path, converted_audio_json_path = paths
2833
-
2834
- if not all(path.exists() for path in paths):
2835
- rvc_model_path, rvc_index_path = _get_rvc_files(model_name)
2836
-
2837
- voice_converter = _get_voice_converter()
2838
-
2839
- voice_converter.convert_audio(
2840
- audio_input_path=str(audio_path),
2841
- audio_output_path=str(converted_audio_path),
2842
- model_path=str(rvc_model_path),
2843
- index_path=str(rvc_index_path) if rvc_index_path else "",
2844
- pitch=n_semitones,
2845
- f0_methods=f0_methods_set,
2846
- index_rate=index_rate,
2847
- volume_envelope=rms_mix_rate,
2848
- protect=protect_rate,
2849
- hop_length=hop_length,
2850
- split_audio=split_audio,
2851
- f0_autotune=autotune_audio,
2852
- f0_autotune_strength=autotune_strength,
2853
- embedder_model=embedder_model,
2854
- embedder_model_custom=(
2855
- str(custom_embedder_model_path)
2856
- if custom_embedder_model_path is not None
2857
- else None
2858
- ),
2859
- clean_audio=clean_audio,
2860
- clean_strength=clean_strength,
2861
- post_process=False,
2862
- resample_sr=0,
2863
- sid=sid,
2864
- )
2865
- json_dump(args_dict, converted_audio_json_path)
2866
- return converted_audio_path
2867
-
2868
-
2869
- @cache
2870
- def _get_audio_separator(
2871
- output_dir: StrPath = INTERMEDIATE_AUDIO_BASE_DIR,
2872
- output_format: str = AudioExt.WAV,
2873
- segment_size: int = SegmentSize.SEG_256,
2874
- sample_rate: int = 44100,
2875
- ) -> Separator:
2876
- static_ffmpeg.add_paths(weak=True)
2877
- from audio_separator.separator import Separator # noqa: PLC0415
2878
-
2879
- """
2880
- Get an audio separator.
2881
-
2882
- Parameters
2883
- ----------
2884
- output_dir : StrPath, default=INTERMEDIATE_AUDIO_BASE_DIR
2885
- The directory to save the separated audio to.
2886
- output_format : str, default=AudioExt.WAV
2887
- The format to save the separated audio in.
2888
- segment_size : int, default=SegmentSize.SEG_256
2889
- The segment size to use for separation.
2890
- sample_rate : int, default=44100
2891
- The sample rate to use for separation.
2892
-
2893
- Returns
2894
- -------
2895
- Separator
2896
- An audio separator.
2897
-
2898
- """
2899
- return Separator(
2900
- model_file_dir=SEPARATOR_MODELS_DIR,
2901
- use_autocast=False,
2902
- output_dir=output_dir,
2903
- output_format=output_format,
2904
- sample_rate=sample_rate,
2905
- mdx_params={
2906
- "hop_length": 1024,
2907
- "segment_size": segment_size,
2908
- "overlap": 0.25,
2909
- "batch_size": 1,
2910
- "enable_denoise": True,
2911
- },
2912
- )
2913
-
2914
-
2915
-
2916
- @spaces.GPU(duration=zgpuduration)
2917
- def separate_audio(
2918
- audio_track: StrPath,
2919
- song_dir: StrPath,
2920
- model_name: SeparationModel,
2921
- segment_size: int,
2922
- ) -> tuple[Path, Path]:
2923
- """
2924
- Separate an audio track into a primary stem and a secondary stem.
2925
-
2926
- Parameters
2927
- ----------
2928
- audio_track : StrPath
2929
- The path to the audio track to separate.
2930
- song_dir : StrPath
2931
- The path to the song directory where the separated primary stem
2932
- and secondary stem will be saved.
2933
- model_name : str
2934
- The name of the model to use for audio separation.
2935
- segment_size : int
2936
- The segment size to use for audio separation.
2937
-
2938
- Returns
2939
- -------
2940
- primary_path : Path
2941
- The path to the separated primary stem.
2942
- secondary_path : Path
2943
- The path to the separated secondary stem.
2944
-
2945
- """
2946
- audio_path = validate_audio_file_exists(audio_track, Entity.AUDIO_TRACK)
2947
- song_dir_path = validate_audio_dir_exists(song_dir, Entity.SONG_DIR)
2948
-
2949
- args_dict = SeparatedAudioMetaData(
2950
- audio_track=FileMetaData(
2951
- name=audio_path.name,
2952
- hash_id=get_file_hash(audio_path),
2953
- ),
2954
- model_name=model_name,
2955
- segment_size=segment_size,
2956
- ).model_dump()
2957
-
2958
- paths = [
2959
- get_unique_base_path(
2960
- song_dir_path,
2961
- prefix,
2962
- args_dict,
2963
- ).with_suffix(suffix)
2964
- for prefix in ["11_Stem_Primary", "11_Stem_Secondary"]
2965
- for suffix in [".wav", ".json"]
2966
- ]
2967
-
2968
- (
2969
- primary_path,
2970
- primary_json_path,
2971
- secondary_path,
2972
- secondary_json_path,
2973
- ) = paths
2974
-
2975
- if not all(path.exists() for path in paths):
2976
- audio_separator = _get_audio_separator(
2977
- output_dir=song_dir_path,
2978
- segment_size=segment_size,
2979
- )
2980
- audio_separator.load_model(model_name)
2981
- audio_separator.separate(
2982
- str(audio_path),
2983
- custom_output_names={
2984
- audio_separator.model_instance.primary_stem_name: str(
2985
- primary_path.with_suffix(""),
2986
- ),
2987
- audio_separator.model_instance.secondary_stem_name: str(
2988
- secondary_path.with_suffix(""),
2989
- ),
2990
- },
2991
- )
2992
- json_dump(args_dict, primary_json_path)
2993
- json_dump(args_dict, secondary_json_path)
2994
-
2995
- return primary_path, secondary_path
2996
-
2997
-
2998
  def run_pipeline2(
2999
  source: str,
3000
  model_name: str,
 
2666
  if audio_track
2667
  ]
2668
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2669
  def run_pipeline2(
2670
  source: str,
2671
  model_name: str,