Update app.py
Browse files
app.py
CHANGED
|
@@ -56,33 +56,67 @@ def check_voice_files():
|
|
| 56 |
else:
|
| 57 |
return "**All voice files are present.** 🎉"
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
@spaces.GPU(duration=120)
|
| 60 |
def tts_generate(text, voice, language):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
|
| 62 |
temp_audio_path = temp_audio.name
|
| 63 |
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
return temp_audio_path
|
| 74 |
-
|
| 75 |
@spaces.GPU(enable_queue=True)
|
| 76 |
def clone_voice(text, audio_file, language):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
|
| 78 |
temp_audio_path = temp_audio.name
|
| 79 |
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
return temp_audio_path
|
| 88 |
|
|
@@ -113,7 +147,7 @@ with gr.Blocks() as demo:
|
|
| 113 |
with gr.Row():
|
| 114 |
clone_text = gr.Textbox(label="Text to speak")
|
| 115 |
clone_audio = gr.Audio(label="Voice reference audio file", type="filepath")
|
| 116 |
-
clone_language = gr.Dropdown(["en", "es", "fr", "de", "it", "ar"], label="Language", value="en")
|
| 117 |
clone_generate_btn = gr.Button("Generate")
|
| 118 |
clone_output = gr.Audio(label="Generated Audio")
|
| 119 |
|
|
@@ -129,4 +163,4 @@ demo.launch()
|
|
| 129 |
# Clean up temporary files (this will run after the Gradio server is closed)
|
| 130 |
for file in os.listdir():
|
| 131 |
if file.endswith('.wav') and file.startswith('tmp'):
|
| 132 |
-
os.remove(file)
|
|
|
|
| 56 |
else:
|
| 57 |
return "**All voice files are present.** 🎉"
|
| 58 |
|
| 59 |
+
# New function to split text into chunks of 100 tokens
|
| 60 |
+
def split_text_into_chunks(text, max_tokens=100):
|
| 61 |
+
"""
|
| 62 |
+
Splits the input text into chunks with a maximum of `max_tokens` tokens each.
|
| 63 |
+
Inserts a newline after each chunk.
|
| 64 |
+
"""
|
| 65 |
+
words = text.split()
|
| 66 |
+
chunks = []
|
| 67 |
+
for i in range(0, len(words), max_tokens):
|
| 68 |
+
chunk = ' '.join(words[i:i + max_tokens])
|
| 69 |
+
chunks.append(chunk)
|
| 70 |
+
return '\n'.join(chunks)
|
| 71 |
+
|
| 72 |
@spaces.GPU(duration=120)
|
| 73 |
def tts_generate(text, voice, language):
|
| 74 |
+
# Check for Hindi language and split text if necessary
|
| 75 |
+
if language == "hi":
|
| 76 |
+
text = split_text_into_chunks(text, max_tokens=100)
|
| 77 |
+
|
| 78 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
|
| 79 |
temp_audio_path = temp_audio.name
|
| 80 |
|
| 81 |
+
try:
|
| 82 |
+
voice_file = hf_hub_download(repo_id="nikkmitra/clone", filename=celebrity_voices[voice], repo_type="space", token=hf_token)
|
| 83 |
+
except Exception as e:
|
| 84 |
+
return f"Error downloading voice file: {e}"
|
| 85 |
|
| 86 |
+
try:
|
| 87 |
+
tts.tts_to_file(
|
| 88 |
+
text=text,
|
| 89 |
+
speaker_wav=voice_file,
|
| 90 |
+
language=language,
|
| 91 |
+
file_path=temp_audio_path
|
| 92 |
+
)
|
| 93 |
+
except AssertionError as ae:
|
| 94 |
+
return f"Error: {ae}"
|
| 95 |
+
except Exception as e:
|
| 96 |
+
return f"An unexpected error occurred: {e}"
|
| 97 |
|
| 98 |
return temp_audio_path
|
| 99 |
+
|
| 100 |
@spaces.GPU(enable_queue=True)
|
| 101 |
def clone_voice(text, audio_file, language):
|
| 102 |
+
# Check for Hindi language and split text if necessary
|
| 103 |
+
if language == "hi":
|
| 104 |
+
text = split_text_into_chunks(text, max_tokens=100)
|
| 105 |
+
|
| 106 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
|
| 107 |
temp_audio_path = temp_audio.name
|
| 108 |
|
| 109 |
+
try:
|
| 110 |
+
tts.tts_to_file(
|
| 111 |
+
text=text,
|
| 112 |
+
speaker_wav=audio_file,
|
| 113 |
+
language=language,
|
| 114 |
+
file_path=temp_audio_path
|
| 115 |
+
)
|
| 116 |
+
except AssertionError as ae:
|
| 117 |
+
return f"Error: {ae}"
|
| 118 |
+
except Exception as e:
|
| 119 |
+
return f"An unexpected error occurred: {e}"
|
| 120 |
|
| 121 |
return temp_audio_path
|
| 122 |
|
|
|
|
| 147 |
with gr.Row():
|
| 148 |
clone_text = gr.Textbox(label="Text to speak")
|
| 149 |
clone_audio = gr.Audio(label="Voice reference audio file", type="filepath")
|
| 150 |
+
clone_language = gr.Dropdown(["en", "es", "fr", "de", "it", "ar", "hi"], label="Language", value="en")
|
| 151 |
clone_generate_btn = gr.Button("Generate")
|
| 152 |
clone_output = gr.Audio(label="Generated Audio")
|
| 153 |
|
|
|
|
| 163 |
# Clean up temporary files (this will run after the Gradio server is closed)
|
| 164 |
for file in os.listdir():
|
| 165 |
if file.endswith('.wav') and file.startswith('tmp'):
|
| 166 |
+
os.remove(file)
|