Spaces:

Artificial-superintelligence
/

Testvoice

Paused

App Files Files Community

Artificial-superintelligence commited on Oct 22, 2024

Commit

fbf5ae6

verified ·

1 Parent(s): 7223dab

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -146

app.py CHANGED Viewed

@@ -1,146 +1,76 @@
-import gradio as gr
-import numpy as np
-import librosa
-import soundfile as sf
-from TTS.api import TTS
-import torch
-import os
-import tempfile
-# Initialize TTS model
-tts = TTS("tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to("cuda" if torch.cuda.is_available() else "cpu")
-def load_audio(audio_path):
-    audio, sr = librosa.load(audio_path, sr=None)
-    return audio, sr
-def save_audio(audio, sr, path):
-    sf.write(path, audio, sr)
-def pitch_shift(audio, sr, n_steps):
-    return librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps)
-def change_voice(audio_path, pitch_shift_amount, formant_shift_amount):
-    # Load the audio
-    audio, sr = load_audio(audio_path)
-    # Apply pitch shifting
-    pitched_audio = pitch_shift(audio, sr, pitch_shift_amount)
-    # Use TTS model for voice conversion
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
-        save_audio(pitched_audio, sr, temp_file.name)
-        converted_audio_path = tts.voice_conversion(
-            source_wav=temp_file.name,
-            target_wav="path/to/female_target_voice.wav",  # You need to provide a female target voice file
-            output_wav=None
-        )
-    # Load the converted audio
-    converted_audio, _ = load_audio(converted_audio_path)
-    # Apply formant shifting (simplified approach)
-    formant_shifted_audio = librosa.effects.pitch_shift(converted_audio, sr=sr, n_steps=formant_shift_amount)
-    # Clean up temporary files
-    os.unlink(temp_file.name)
-    os.unlink(converted_audio_path)
-    return (sr, formant_shifted_audio)
-def process_audio(audio_file, pitch_shift_amount, formant_shift_amount):
-    sr, audio = change_voice(audio_file.name, pitch_shift_amount, formant_shift_amount)
-    output_path = "output_voice.wav"
-    save_audio(audio, sr, output_path)
-    return output_path
-# Custom CSS for improved design
-custom_css = """
-.gradio-container {
-    background-color: #f0f4f8;
-}
-.container {
-    max-width: 900px;
-    margin: auto;
-    padding: 20px;
-    border-radius: 10px;
-    background-color: white;
-    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
-}
-h1 {
-    color: #2c3e50;
-    text-align: center;
-    font-size: 2.5em;
-    margin-bottom: 20px;
-}
-.description {
-    text-align: center;
-    color: #34495e;
-    margin-bottom: 30px;
-}
-.input-section, .output-section {
-    background-color: #ecf0f1;
-    padding: 20px;
-    border-radius: 8px;
-    margin-bottom: 20px;
-}
-.input-section h3, .output-section h3 {
-    color: #2980b9;
-    margin-bottom: 15px;
-}
-"""
-# Gradio Interface with improved design
-with gr.Blocks(css=custom_css) as demo:
-    gr.HTML(
-        """
-        <div style="text-align: center; max-width: 800px; margin: 0 auto;">
-            <div style="display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;">
-                <svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" fill="currentColor" viewBox="0 0 16 16" style="vertical-align: middle;">
-                    <path d="M3.5 6.5A.5.5 0 0 1 4 7v1a4 4 0 0 0 8 0V7a.5.5 0 0 1 1 0v1a5 5 0 0 1-4.5 4.975V15h3a.5.5 0 0 1 0 1h-7a.5.5 0 0 1 0-1h3v-2.025A5 5 0 0 1 3 8V7a.5.5 0 0 1 .5-.5z"/>
-                    <path d="M10 8a2 2 0 1 1-4 0V3a2 2 0 1 1 4 0v5zM8 0a3 3 0 0 0-3 3v5a3 3 0 0 0 6 0V3a3 3 0 0 0-3-3z"/>
-                </svg>
-                <h1 style="font-weight: 900; margin-bottom: 7px;">
-                    AI Voice Changer
-                </h1>
-            </div>
-            <p class="description">Transform any voice into a realistic female voice using advanced AI technology</p>
-        </div>
-        """
-    )
-    with gr.Row():
-        with gr.Column(elem_classes="input-section"):
-            gr.Markdown("### Input")
-            audio_input = gr.Audio(type="filepath", label="Upload Voice")
-            pitch_shift = gr.Slider(-12, 12, step=0.5, label="Pitch Shift", value=0)
-            formant_shift = gr.Slider(-5, 5, step=0.1, label="Formant Shift", value=0)
-            submit_btn = gr.Button("Transform Voice", variant="primary")
-        with gr.Column(elem_classes="output-section"):
-            gr.Markdown("### Output")
-            audio_output = gr.Audio(label="Transformed Voice")
-    submit_btn.click(
-        fn=process_audio,
-        inputs=[audio_input, pitch_shift, formant_shift],
-        outputs=audio_output,
-    )
-    gr.Markdown(
-        """
-        ### How to use:
-        1. Upload an audio file containing the voice you want to transform.
-        2. Adjust the Pitch Shift and Formant Shift sliders to fine-tune the voice (optional).
-        3. Click the "Transform Voice" button to process the audio.
-        4. Listen to the transformed voice in the output section.
-        5. Download the transformed audio file if desired.
-        Note: This application uses AI to transform voices. The quality of the output may vary depending on the input audio quality and the chosen settings.
-        """
-    )
-if __name__ == "__main__":
-    demo.launch()

+runtime error
+Exit code: 1. Reason: esample:False
+ | > num_mels:64
+ | > log_func:np.log10
+ | > min_level_db:-100
+ | > frame_shift_ms:None
+ | > frame_length_ms:None
+ | > ref_level_db:20
+ | > fft_size:512
+ | > power:1.5
+ | > preemphasis:0.97
+ | > griffin_lim_iters:60
+ | > signal_norm:False
+ | > symmetric_norm:False
+ | > mel_fmin:0
+ | > mel_fmax:8000.0
+ | > pitch_fmin:1.0
+ | > pitch_fmax:640.0
+ | > spec_gain:20.0
+ | > stft_pad_mode:reflect
+ | > max_norm:4.0
+ | > clip_norm:False
+ | > do_trim_silence:False
+ | > trim_db:60
+ | > do_sound_norm:False
+ | > do_amp_to_db_linear:True
+ | > do_amp_to_db_mel:True
+ | > do_rms_norm:True
+ | > db_level:-27.0
+ | > stats_path:None
+ | > base:10
+ | > hop_length:160
+ | > win_length:400
+ > External Speaker Encoder Loaded !!
+ > initialization of language-embedding layers.
+/usr/local/lib/python3.10/site-packages/torch/nn/utils/weight_norm.py:143: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`.
+  WeightNorm.apply(module, name, dim)
+ > Model fully restored.
+ > Setting up Audio Processor...
+ | > sample_rate:16000
+ | > resample:False
+ | > num_mels:64
+ | > log_func:np.log10
+ | > min_level_db:-100
+ | > frame_shift_ms:None
+ | > frame_length_ms:None
+ | > ref_level_db:20
+ | > fft_size:512
+ | > power:1.5
+ | > preemphasis:0.97
+ | > griffin_lim_iters:60
+ | > signal_norm:False
+ | > symmetric_norm:False
+ | > mel_fmin:0
+ | > mel_fmax:8000.0
+ | > pitch_fmin:1.0
+ | > pitch_fmax:640.0
+ | > spec_gain:20.0
+ | > stft_pad_mode:reflect
+ | > max_norm:4.0
+ | > clip_norm:False
+ | > do_trim_silence:False
+ | > trim_db:60
+ | > do_sound_norm:False
+ | > do_amp_to_db_linear:True
+ | > do_amp_to_db_mel:True
+ | > do_rms_norm:True
+ | > db_level:-27.0
+ | > stats_path:None
+ | > base:10
+ | > hop_length:160
+ | > win_length:400
+Traceback (most recent call last):
+  File "/home/user/app/app.py", line 11, in <module>
+    tts = TTS("tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to("cuda" if torch.cuda.is_available() else "cpu")
+AttributeError: 'TTS' object has no attribute 'to'