fix speech rate for v2
Browse files
app.py
CHANGED
|
@@ -222,6 +222,7 @@ if st.session_state.page == "Home":
|
|
| 222 |
pronunciation_dict_str = st.session_state.pronunc_dict
|
| 223 |
|
| 224 |
input_text = st.session_state.input_text
|
|
|
|
| 225 |
if not input_text.strip():
|
| 226 |
st.warning("Please enter text to synthesize")
|
| 227 |
elif len(input_text) > 1000:
|
|
@@ -278,6 +279,7 @@ if st.session_state.page == "Home":
|
|
| 278 |
classes.upload_voice_clone_audio(reference_audio, voice_id)
|
| 279 |
voice_path = cache_key
|
| 280 |
st.session_state.voice_cache[cache_key] = voice_id
|
|
|
|
| 281 |
status_msg = f"✓ Cloned voice successfully for language: {language}"
|
| 282 |
# Generate speech
|
| 283 |
with st.spinner("Generating speech..."):
|
|
@@ -310,7 +312,7 @@ if st.session_state.page == "Home":
|
|
| 310 |
st.session_state.show_feedback = True
|
| 311 |
print("Generation completed......")
|
| 312 |
except Exception as e:
|
| 313 |
-
st.warning("Something went wrong. Please try again!")
|
| 314 |
st.session_state.show_feedback = False
|
| 315 |
st.markdown("---")
|
| 316 |
st.markdown("### 🎧 Output & Feedback")
|
|
|
|
| 222 |
pronunciation_dict_str = st.session_state.pronunc_dict
|
| 223 |
|
| 224 |
input_text = st.session_state.input_text
|
| 225 |
+
print(f"Clicked Generation btn.....\n input:- {input_text}")
|
| 226 |
if not input_text.strip():
|
| 227 |
st.warning("Please enter text to synthesize")
|
| 228 |
elif len(input_text) > 1000:
|
|
|
|
| 279 |
classes.upload_voice_clone_audio(reference_audio, voice_id)
|
| 280 |
voice_path = cache_key
|
| 281 |
st.session_state.voice_cache[cache_key] = voice_id
|
| 282 |
+
print(f"This is the voice id get from {model}:--{voice_id}")
|
| 283 |
status_msg = f"✓ Cloned voice successfully for language: {language}"
|
| 284 |
# Generate speech
|
| 285 |
with st.spinner("Generating speech..."):
|
|
|
|
| 312 |
st.session_state.show_feedback = True
|
| 313 |
print("Generation completed......")
|
| 314 |
except Exception as e:
|
| 315 |
+
# st.warning("Something went wrong. Please try again!")
|
| 316 |
st.session_state.show_feedback = False
|
| 317 |
st.markdown("---")
|
| 318 |
st.markdown("### 🎧 Output & Feedback")
|
utils.py
CHANGED
|
@@ -51,8 +51,10 @@ def unpack_pkl_data(s3_key=parameters.pkl_data_key):
|
|
| 51 |
with aws.fs.open(f"s3://{aws.bucket_name}/{s3_key}", "rb") as f:
|
| 52 |
file_bytes = f.read()
|
| 53 |
loaded_data = pickle.loads(file_bytes)
|
|
|
|
| 54 |
return loaded_data
|
| 55 |
except Exception as e:
|
|
|
|
| 56 |
return None
|
| 57 |
|
| 58 |
st.session_state.loaded_data = unpack_pkl_data()
|
|
@@ -421,7 +423,8 @@ async def v1_generate_speech_async(
|
|
| 421 |
send_voice_id = [voice_id]
|
| 422 |
else:
|
| 423 |
send_voice_id = voice_id
|
| 424 |
-
|
|
|
|
| 425 |
# Use AsyncOpenAI streaming response (matches your original code)
|
| 426 |
try:
|
| 427 |
async with v1_client.audio.speech.with_streaming_response.create(
|
|
@@ -456,6 +459,7 @@ async def v1_generate_speech_async(
|
|
| 456 |
)
|
| 457 |
return sr, aud
|
| 458 |
except Exception as e:
|
|
|
|
| 459 |
st.warning("Something went wrong in Audios Generation. Pleace try later.")
|
| 460 |
|
| 461 |
async def v2_generate_speech_async(
|
|
@@ -494,6 +498,7 @@ async def v2_generate_speech_async(
|
|
| 494 |
else:
|
| 495 |
send_voice_id = voice_id
|
| 496 |
|
|
|
|
| 497 |
# Use AsyncOpenAI streaming response (matches your original code)
|
| 498 |
try:
|
| 499 |
async with v2_client.audio.speech.with_streaming_response.create(
|
|
@@ -505,7 +510,7 @@ async def v2_generate_speech_async(
|
|
| 505 |
async for chunk in response.iter_bytes(chunk_size=1024):
|
| 506 |
audio_chunks.append(chunk)
|
| 507 |
audio_data = b''.join(audio_chunks)
|
| 508 |
-
header = audio_header_creater(audio_data, sample_rate=
|
| 509 |
audio = io.BytesIO(header + audio_data)
|
| 510 |
aud, sr = sf.read(audio)
|
| 511 |
saved_path = save_generated_audio(aud, session_id)
|
|
@@ -527,4 +532,5 @@ async def v2_generate_speech_async(
|
|
| 527 |
)
|
| 528 |
return sr, aud
|
| 529 |
except Exception as e:
|
|
|
|
| 530 |
st.warning("Something went wrong in Audios Generation. Pleace try later.")
|
|
|
|
| 51 |
with aws.fs.open(f"s3://{aws.bucket_name}/{s3_key}", "rb") as f:
|
| 52 |
file_bytes = f.read()
|
| 53 |
loaded_data = pickle.loads(file_bytes)
|
| 54 |
+
print(f"pkl unpack successful")
|
| 55 |
return loaded_data
|
| 56 |
except Exception as e:
|
| 57 |
+
print(f"{e}")
|
| 58 |
return None
|
| 59 |
|
| 60 |
st.session_state.loaded_data = unpack_pkl_data()
|
|
|
|
| 423 |
send_voice_id = [voice_id]
|
| 424 |
else:
|
| 425 |
send_voice_id = voice_id
|
| 426 |
+
|
| 427 |
+
# print(f"\n\nPayload:::---\nModel:-{model}\nSpeaker:-{send_voice_id}\nText:-{text}\nExtra Body: {extra_body}\nExtra Header: {extra_headers}")
|
| 428 |
# Use AsyncOpenAI streaming response (matches your original code)
|
| 429 |
try:
|
| 430 |
async with v1_client.audio.speech.with_streaming_response.create(
|
|
|
|
| 459 |
)
|
| 460 |
return sr, aud
|
| 461 |
except Exception as e:
|
| 462 |
+
print(f"Error:- {e}")
|
| 463 |
st.warning("Something went wrong in Audios Generation. Pleace try later.")
|
| 464 |
|
| 465 |
async def v2_generate_speech_async(
|
|
|
|
| 498 |
else:
|
| 499 |
send_voice_id = voice_id
|
| 500 |
|
| 501 |
+
print(f"\n\nPayload:::---\nModel:-{model}\nSpeaker:-{send_voice_id}\nText:-{text}\nExtra Body: {extra_body}")
|
| 502 |
# Use AsyncOpenAI streaming response (matches your original code)
|
| 503 |
try:
|
| 504 |
async with v2_client.audio.speech.with_streaming_response.create(
|
|
|
|
| 510 |
async for chunk in response.iter_bytes(chunk_size=1024):
|
| 511 |
audio_chunks.append(chunk)
|
| 512 |
audio_data = b''.join(audio_chunks)
|
| 513 |
+
header = audio_header_creater(audio_data, sample_rate=24_000)
|
| 514 |
audio = io.BytesIO(header + audio_data)
|
| 515 |
aud, sr = sf.read(audio)
|
| 516 |
saved_path = save_generated_audio(aud, session_id)
|
|
|
|
| 532 |
)
|
| 533 |
return sr, aud
|
| 534 |
except Exception as e:
|
| 535 |
+
print(f'Error:-{e}')
|
| 536 |
st.warning("Something went wrong in Audios Generation. Pleace try later.")
|