ajajali09 commited on
Commit
cbb1096
·
1 Parent(s): 35b23c4

fix speech rate for v2

Browse files
Files changed (2) hide show
  1. app.py +3 -1
  2. utils.py +8 -2
app.py CHANGED
@@ -222,6 +222,7 @@ if st.session_state.page == "Home":
222
  pronunciation_dict_str = st.session_state.pronunc_dict
223
 
224
  input_text = st.session_state.input_text
 
225
  if not input_text.strip():
226
  st.warning("Please enter text to synthesize")
227
  elif len(input_text) > 1000:
@@ -278,6 +279,7 @@ if st.session_state.page == "Home":
278
  classes.upload_voice_clone_audio(reference_audio, voice_id)
279
  voice_path = cache_key
280
  st.session_state.voice_cache[cache_key] = voice_id
 
281
  status_msg = f"✓ Cloned voice successfully for language: {language}"
282
  # Generate speech
283
  with st.spinner("Generating speech..."):
@@ -310,7 +312,7 @@ if st.session_state.page == "Home":
310
  st.session_state.show_feedback = True
311
  print("Generation completed......")
312
  except Exception as e:
313
- st.warning("Something went wrong. Please try again!")
314
  st.session_state.show_feedback = False
315
  st.markdown("---")
316
  st.markdown("### 🎧 Output & Feedback")
 
222
  pronunciation_dict_str = st.session_state.pronunc_dict
223
 
224
  input_text = st.session_state.input_text
225
+ print(f"Clicked Generation btn.....\n input:- {input_text}")
226
  if not input_text.strip():
227
  st.warning("Please enter text to synthesize")
228
  elif len(input_text) > 1000:
 
279
  classes.upload_voice_clone_audio(reference_audio, voice_id)
280
  voice_path = cache_key
281
  st.session_state.voice_cache[cache_key] = voice_id
282
+ print(f"This is the voice id get from {model}:--{voice_id}")
283
  status_msg = f"✓ Cloned voice successfully for language: {language}"
284
  # Generate speech
285
  with st.spinner("Generating speech..."):
 
312
  st.session_state.show_feedback = True
313
  print("Generation completed......")
314
  except Exception as e:
315
+ # st.warning("Something went wrong. Please try again!")
316
  st.session_state.show_feedback = False
317
  st.markdown("---")
318
  st.markdown("### 🎧 Output & Feedback")
utils.py CHANGED
@@ -51,8 +51,10 @@ def unpack_pkl_data(s3_key=parameters.pkl_data_key):
51
  with aws.fs.open(f"s3://{aws.bucket_name}/{s3_key}", "rb") as f:
52
  file_bytes = f.read()
53
  loaded_data = pickle.loads(file_bytes)
 
54
  return loaded_data
55
  except Exception as e:
 
56
  return None
57
 
58
  st.session_state.loaded_data = unpack_pkl_data()
@@ -421,7 +423,8 @@ async def v1_generate_speech_async(
421
  send_voice_id = [voice_id]
422
  else:
423
  send_voice_id = voice_id
424
-
 
425
  # Use AsyncOpenAI streaming response (matches your original code)
426
  try:
427
  async with v1_client.audio.speech.with_streaming_response.create(
@@ -456,6 +459,7 @@ async def v1_generate_speech_async(
456
  )
457
  return sr, aud
458
  except Exception as e:
 
459
  st.warning("Something went wrong in Audios Generation. Pleace try later.")
460
 
461
  async def v2_generate_speech_async(
@@ -494,6 +498,7 @@ async def v2_generate_speech_async(
494
  else:
495
  send_voice_id = voice_id
496
 
 
497
  # Use AsyncOpenAI streaming response (matches your original code)
498
  try:
499
  async with v2_client.audio.speech.with_streaming_response.create(
@@ -505,7 +510,7 @@ async def v2_generate_speech_async(
505
  async for chunk in response.iter_bytes(chunk_size=1024):
506
  audio_chunks.append(chunk)
507
  audio_data = b''.join(audio_chunks)
508
- header = audio_header_creater(audio_data, sample_rate=16_000)
509
  audio = io.BytesIO(header + audio_data)
510
  aud, sr = sf.read(audio)
511
  saved_path = save_generated_audio(aud, session_id)
@@ -527,4 +532,5 @@ async def v2_generate_speech_async(
527
  )
528
  return sr, aud
529
  except Exception as e:
 
530
  st.warning("Something went wrong in Audios Generation. Pleace try later.")
 
51
  with aws.fs.open(f"s3://{aws.bucket_name}/{s3_key}", "rb") as f:
52
  file_bytes = f.read()
53
  loaded_data = pickle.loads(file_bytes)
54
+ print(f"pkl unpack successful")
55
  return loaded_data
56
  except Exception as e:
57
+ print(f"{e}")
58
  return None
59
 
60
  st.session_state.loaded_data = unpack_pkl_data()
 
423
  send_voice_id = [voice_id]
424
  else:
425
  send_voice_id = voice_id
426
+
427
+ # print(f"\n\nPayload:::---\nModel:-{model}\nSpeaker:-{send_voice_id}\nText:-{text}\nExtra Body: {extra_body}\nExtra Header: {extra_headers}")
428
  # Use AsyncOpenAI streaming response (matches your original code)
429
  try:
430
  async with v1_client.audio.speech.with_streaming_response.create(
 
459
  )
460
  return sr, aud
461
  except Exception as e:
462
+ print(f"Error:- {e}")
463
  st.warning("Something went wrong in Audios Generation. Pleace try later.")
464
 
465
  async def v2_generate_speech_async(
 
498
  else:
499
  send_voice_id = voice_id
500
 
501
+ print(f"\n\nPayload:::---\nModel:-{model}\nSpeaker:-{send_voice_id}\nText:-{text}\nExtra Body: {extra_body}")
502
  # Use AsyncOpenAI streaming response (matches your original code)
503
  try:
504
  async with v2_client.audio.speech.with_streaming_response.create(
 
510
  async for chunk in response.iter_bytes(chunk_size=1024):
511
  audio_chunks.append(chunk)
512
  audio_data = b''.join(audio_chunks)
513
+ header = audio_header_creater(audio_data, sample_rate=24_000)
514
  audio = io.BytesIO(header + audio_data)
515
  aud, sr = sf.read(audio)
516
  saved_path = save_generated_audio(aud, session_id)
 
532
  )
533
  return sr, aud
534
  except Exception as e:
535
+ print(f'Error:-{e}')
536
  st.warning("Something went wrong in Audios Generation. Pleace try later.")