Spaces:
Configuration error
Configuration error
Commit ·
4236718
1
Parent(s): c3d118a
Update app.py
Browse files
app.py
CHANGED
|
@@ -116,15 +116,15 @@ def greet(Text,Voicetoclone,VoiceMicrophone):
|
|
| 116 |
print(VoiceMicrophone)
|
| 117 |
sample= str(VoiceMicrophone)
|
| 118 |
size= len(reference_files)*sys.getsizeof(reference_files)
|
| 119 |
-
size2= size /
|
| 120 |
-
if (size2 > 0.012) or len(text)>
|
| 121 |
-
message="File is greater than
|
| 122 |
print(message)
|
| 123 |
raise SystemExit("File is greater than 30mb. Please re-try or Text inserted is longer than 2000 characters. Please re-try with smaller sizes.")
|
| 124 |
else:
|
| 125 |
-
os.system('ffmpeg-normalize $sample -nt rms -t=-
|
| 126 |
reference_emb = SE_speaker_manager.compute_d_vector_from_clip(reference_files)
|
| 127 |
-
model.length_scale =
|
| 128 |
model.inference_noise_scale = 0.3 # defines the noise variance applied to the random z vector at inference.
|
| 129 |
model.inference_noise_scale_dp = 0.3 # defines the noise variance applied to the duration predictor z vector at inference.
|
| 130 |
text = text
|
|
@@ -158,7 +158,7 @@ def greet(Text,Voicetoclone,VoiceMicrophone):
|
|
| 158 |
|
| 159 |
demo = gr.Interface(
|
| 160 |
fn=greet,
|
| 161 |
-
inputs=[gr.inputs.Textbox(label='What would you like the voice to say? (max.
|
| 162 |
outputs="audio",
|
| 163 |
title="Bilal's Voice Cloning Tool"
|
| 164 |
)
|
|
|
|
| 116 |
print(VoiceMicrophone)
|
| 117 |
sample= str(VoiceMicrophone)
|
| 118 |
size= len(reference_files)*sys.getsizeof(reference_files)
|
| 119 |
+
size2= size / 10000000
|
| 120 |
+
if (size2 > 0.012) or len(text)>20000:
|
| 121 |
+
message="File is greater than 300mb or Text inserted is longer than 20000 characters. Please re-try with smaller sizes."
|
| 122 |
print(message)
|
| 123 |
raise SystemExit("File is greater than 30mb. Please re-try or Text inserted is longer than 2000 characters. Please re-try with smaller sizes.")
|
| 124 |
else:
|
| 125 |
+
os.system('ffmpeg-normalize $sample -nt rms -t=-270 -o $sample -ar 160000 -f')
|
| 126 |
reference_emb = SE_speaker_manager.compute_d_vector_from_clip(reference_files)
|
| 127 |
+
model.length_scale = 2 # scaler for the duration predictor. The larger it is, the slower the speech.
|
| 128 |
model.inference_noise_scale = 0.3 # defines the noise variance applied to the random z vector at inference.
|
| 129 |
model.inference_noise_scale_dp = 0.3 # defines the noise variance applied to the duration predictor z vector at inference.
|
| 130 |
text = text
|
|
|
|
| 158 |
|
| 159 |
demo = gr.Interface(
|
| 160 |
fn=greet,
|
| 161 |
+
inputs=[gr.inputs.Textbox(label='What would you like the voice to say? (max. 20000 characters per request)'),gr.Audio(type="filepath", source="upload",label='Please upload a voice to clone (max. 30mb)'),gr.Audio(source="microphone", type="filepath", streaming=True)],
|
| 162 |
outputs="audio",
|
| 163 |
title="Bilal's Voice Cloning Tool"
|
| 164 |
)
|