Spaces:

Oriserve
/

OriTTS

Running

App Files Files Community

ajajali09 commited on 27 days ago

Commit

cbb1096

1 Parent(s): 35b23c4

fix speech rate for v2

Browse files

Files changed (2) hide show

app.py +3 -1
utils.py +8 -2

app.py CHANGED Viewed

@@ -222,6 +222,7 @@ if st.session_state.page == "Home":
         pronunciation_dict_str = st.session_state.pronunc_dict
         input_text = st.session_state.input_text
         if not input_text.strip():
             st.warning("Please enter text to synthesize")
         elif len(input_text) > 1000:
@@ -278,6 +279,7 @@ if st.session_state.page == "Home":
                                 classes.upload_voice_clone_audio(reference_audio, voice_id)
                                 voice_path = cache_key
                                 st.session_state.voice_cache[cache_key] = voice_id
                         status_msg = f"✓ Cloned voice successfully for language: {language}"
                 # Generate speech
                 with st.spinner("Generating speech..."):
@@ -310,7 +312,7 @@ if st.session_state.page == "Home":
                 st.session_state.show_feedback = True
                 print("Generation completed......")
             except Exception as e:
-                st.warning("Something went wrong. Please try again!")
                 st.session_state.show_feedback = False
     st.markdown("---")
     st.markdown("### 🎧 Output & Feedback")

         pronunciation_dict_str = st.session_state.pronunc_dict
         input_text = st.session_state.input_text
+        print(f"Clicked Generation btn.....\n input:- {input_text}")
         if not input_text.strip():
             st.warning("Please enter text to synthesize")
         elif len(input_text) > 1000:
                                 classes.upload_voice_clone_audio(reference_audio, voice_id)
                                 voice_path = cache_key
                                 st.session_state.voice_cache[cache_key] = voice_id
+                                print(f"This is the voice id get from {model}:--{voice_id}")
                         status_msg = f"✓ Cloned voice successfully for language: {language}"
                 # Generate speech
                 with st.spinner("Generating speech..."):
                 st.session_state.show_feedback = True
                 print("Generation completed......")
             except Exception as e:
+                # st.warning("Something went wrong. Please try again!")
                 st.session_state.show_feedback = False
     st.markdown("---")
     st.markdown("### 🎧 Output & Feedback")

utils.py CHANGED Viewed

@@ -51,8 +51,10 @@ def unpack_pkl_data(s3_key=parameters.pkl_data_key):
         with aws.fs.open(f"s3://{aws.bucket_name}/{s3_key}", "rb") as f:
             file_bytes = f.read()
             loaded_data = pickle.loads(file_bytes)
             return loaded_data
     except Exception as e:
         return None
 st.session_state.loaded_data = unpack_pkl_data()
@@ -421,7 +423,8 @@ async def v1_generate_speech_async(
             send_voice_id = [voice_id]
     else:
         send_voice_id = voice_id
     # Use AsyncOpenAI streaming response (matches your original code)
     try:
         async with v1_client.audio.speech.with_streaming_response.create(
@@ -456,6 +459,7 @@ async def v1_generate_speech_async(
         )
         return sr, aud
     except Exception as e:
         st.warning("Something went wrong in Audios Generation. Pleace try later.")
 async def v2_generate_speech_async(
@@ -494,6 +498,7 @@ async def v2_generate_speech_async(
     else:
         send_voice_id = voice_id
     # Use AsyncOpenAI streaming response (matches your original code)
     try:
         async with v2_client.audio.speech.with_streaming_response.create(
@@ -505,7 +510,7 @@ async def v2_generate_speech_async(
             async for chunk in response.iter_bytes(chunk_size=1024):
                 audio_chunks.append(chunk)
         audio_data = b''.join(audio_chunks)
-        header = audio_header_creater(audio_data, sample_rate=16_000)
         audio = io.BytesIO(header + audio_data)
         aud, sr = sf.read(audio)
         saved_path = save_generated_audio(aud, session_id)
@@ -527,4 +532,5 @@ async def v2_generate_speech_async(
         )
         return sr, aud
     except Exception as e:
         st.warning("Something went wrong in Audios Generation. Pleace try later.")

         with aws.fs.open(f"s3://{aws.bucket_name}/{s3_key}", "rb") as f:
             file_bytes = f.read()
             loaded_data = pickle.loads(file_bytes)
+            print(f"pkl unpack successful")
             return loaded_data
     except Exception as e:
+        print(f"{e}")
         return None
 st.session_state.loaded_data = unpack_pkl_data()
             send_voice_id = [voice_id]
     else:
         send_voice_id = voice_id
+    # print(f"\n\nPayload:::---\nModel:-{model}\nSpeaker:-{send_voice_id}\nText:-{text}\nExtra Body: {extra_body}\nExtra Header: {extra_headers}")
     # Use AsyncOpenAI streaming response (matches your original code)
     try:
         async with v1_client.audio.speech.with_streaming_response.create(
         )
         return sr, aud
     except Exception as e:
+        print(f"Error:- {e}")
         st.warning("Something went wrong in Audios Generation. Pleace try later.")
 async def v2_generate_speech_async(
     else:
         send_voice_id = voice_id
+    print(f"\n\nPayload:::---\nModel:-{model}\nSpeaker:-{send_voice_id}\nText:-{text}\nExtra Body: {extra_body}")
     # Use AsyncOpenAI streaming response (matches your original code)
     try:
         async with v2_client.audio.speech.with_streaming_response.create(
             async for chunk in response.iter_bytes(chunk_size=1024):
                 audio_chunks.append(chunk)
         audio_data = b''.join(audio_chunks)
+        header = audio_header_creater(audio_data, sample_rate=24_000)
         audio = io.BytesIO(header + audio_data)
         aud, sr = sf.read(audio)
         saved_path = save_generated_audio(aud, session_id)
         )
         return sr, aud
     except Exception as e:
+        print(f'Error:-{e}')
         st.warning("Something went wrong in Audios Generation. Pleace try later.")