fix in a bug
Browse files- S3_bucket.py +0 -1
- app.py +2 -21
- utils.py +9 -52
S3_bucket.py
CHANGED
|
@@ -183,5 +183,4 @@ class AWS:
|
|
| 183 |
self.s3_client.upload_fileobj(
|
| 184 |
obj, self.bucket_name, s3_key, ExtraArgs={"ContentType": "audio/wav"}
|
| 185 |
)
|
| 186 |
-
print("raw wav file uploaded to s3!")
|
| 187 |
return None
|
|
|
|
| 183 |
self.s3_client.upload_fileobj(
|
| 184 |
obj, self.bucket_name, s3_key, ExtraArgs={"ContentType": "audio/wav"}
|
| 185 |
)
|
|
|
|
| 186 |
return None
|
app.py
CHANGED
|
@@ -74,7 +74,7 @@ if st.session_state.page == "Home":
|
|
| 74 |
st.title("Home")
|
| 75 |
st.markdown("---")
|
| 76 |
st.header("Models......")
|
| 77 |
-
model = st.radio("Select Model", ["
|
| 78 |
if model == "V1":
|
| 79 |
st.header("Languages.....")
|
| 80 |
language = st.selectbox("Select Language", list(utils.V1_LANGUAGES.keys()))
|
|
@@ -85,9 +85,6 @@ if st.session_state.page == "Home":
|
|
| 85 |
reference_audio = None
|
| 86 |
else:
|
| 87 |
st.info("Give a reference audio (min 5 seconds)")
|
| 88 |
-
# audio_file = st.file_uploader("Reference Audio", type=['wav', 'mp3', 'flac'])
|
| 89 |
-
# reference_audio = audio_file
|
| 90 |
-
# default_speaker = None
|
| 91 |
audio_source = st.radio(
|
| 92 |
"Reference audio source",
|
| 93 |
["Upload file", "Record audio"],
|
|
@@ -118,9 +115,6 @@ if st.session_state.page == "Home":
|
|
| 118 |
reference_audio = None
|
| 119 |
else:
|
| 120 |
st.info("Give a reference audio (min 5 seconds)")
|
| 121 |
-
# audio_file = st.file_uploader("Reference Audio", type=['wav', 'mp3', 'flac'])
|
| 122 |
-
# reference_audio = audio_file
|
| 123 |
-
# default_speaker = None
|
| 124 |
audio_source = st.radio(
|
| 125 |
"Reference audio source",
|
| 126 |
["Upload file", "Record audio"],
|
|
@@ -223,13 +217,9 @@ if st.session_state.page == "Home":
|
|
| 223 |
if generate_btn:
|
| 224 |
|
| 225 |
session_id = utils.generate_session_id()
|
| 226 |
-
print(f"\n\nGenerate btn is pressed.....\nThis is the session ID : -{session_id}")
|
| 227 |
|
| 228 |
# Validate pronunciation input
|
| 229 |
-
# if pr_key.strip() and pr_value.strip():
|
| 230 |
pronunciation_dict_str = st.session_state.pronunc_dict
|
| 231 |
-
# else:
|
| 232 |
-
# pronunciation_dict_str = {}
|
| 233 |
|
| 234 |
input_text = st.session_state.input_text
|
| 235 |
if not input_text.strip():
|
|
@@ -284,7 +274,6 @@ if st.session_state.page == "Home":
|
|
| 284 |
else:
|
| 285 |
result = utils.v2_clone_voice(tmp_file.name, user_id, token)
|
| 286 |
voice_id = result['voice_id']
|
| 287 |
-
print(f"Voice Clone succesfully from mode {model} id is {voice_id}")
|
| 288 |
reference_audio.seek(0)
|
| 289 |
classes.upload_voice_clone_audio(reference_audio, voice_id)
|
| 290 |
voice_path = cache_key
|
|
@@ -312,14 +301,6 @@ if st.session_state.page == "Home":
|
|
| 312 |
)
|
| 313 |
)
|
| 314 |
loop.close()
|
| 315 |
-
|
| 316 |
-
# st.success(status_msg)
|
| 317 |
-
# st.audio(audio, sample_rate=sr)
|
| 318 |
-
|
| 319 |
-
# st.session_state.show_feedback = True
|
| 320 |
-
# st.session_state.last_session_id = session_id
|
| 321 |
-
# st.success(status_msg)
|
| 322 |
-
|
| 323 |
# Store audio + session info in state, mark as available
|
| 324 |
st.session_state.last_msg = status_msg
|
| 325 |
st.session_state.last_audio = audio
|
|
@@ -327,7 +308,7 @@ if st.session_state.page == "Home":
|
|
| 327 |
st.session_state.last_session_id = session_id
|
| 328 |
st.session_state.has_audio = True
|
| 329 |
st.session_state.show_feedback = True
|
| 330 |
-
|
| 331 |
except Exception as e:
|
| 332 |
st.error(f"Error: {str(e)}")
|
| 333 |
st.session_state.show_feedback = False
|
|
|
|
| 74 |
st.title("Home")
|
| 75 |
st.markdown("---")
|
| 76 |
st.header("Models......")
|
| 77 |
+
model = st.radio("Select Model", ["V2", "V1"])
|
| 78 |
if model == "V1":
|
| 79 |
st.header("Languages.....")
|
| 80 |
language = st.selectbox("Select Language", list(utils.V1_LANGUAGES.keys()))
|
|
|
|
| 85 |
reference_audio = None
|
| 86 |
else:
|
| 87 |
st.info("Give a reference audio (min 5 seconds)")
|
|
|
|
|
|
|
|
|
|
| 88 |
audio_source = st.radio(
|
| 89 |
"Reference audio source",
|
| 90 |
["Upload file", "Record audio"],
|
|
|
|
| 115 |
reference_audio = None
|
| 116 |
else:
|
| 117 |
st.info("Give a reference audio (min 5 seconds)")
|
|
|
|
|
|
|
|
|
|
| 118 |
audio_source = st.radio(
|
| 119 |
"Reference audio source",
|
| 120 |
["Upload file", "Record audio"],
|
|
|
|
| 217 |
if generate_btn:
|
| 218 |
|
| 219 |
session_id = utils.generate_session_id()
|
|
|
|
| 220 |
|
| 221 |
# Validate pronunciation input
|
|
|
|
| 222 |
pronunciation_dict_str = st.session_state.pronunc_dict
|
|
|
|
|
|
|
| 223 |
|
| 224 |
input_text = st.session_state.input_text
|
| 225 |
if not input_text.strip():
|
|
|
|
| 274 |
else:
|
| 275 |
result = utils.v2_clone_voice(tmp_file.name, user_id, token)
|
| 276 |
voice_id = result['voice_id']
|
|
|
|
| 277 |
reference_audio.seek(0)
|
| 278 |
classes.upload_voice_clone_audio(reference_audio, voice_id)
|
| 279 |
voice_path = cache_key
|
|
|
|
| 301 |
)
|
| 302 |
)
|
| 303 |
loop.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
# Store audio + session info in state, mark as available
|
| 305 |
st.session_state.last_msg = status_msg
|
| 306 |
st.session_state.last_audio = audio
|
|
|
|
| 308 |
st.session_state.last_session_id = session_id
|
| 309 |
st.session_state.has_audio = True
|
| 310 |
st.session_state.show_feedback = True
|
| 311 |
+
print("Generation completed......")
|
| 312 |
except Exception as e:
|
| 313 |
st.error(f"Error: {str(e)}")
|
| 314 |
st.session_state.show_feedback = False
|
utils.py
CHANGED
|
@@ -46,16 +46,13 @@ def generate_session_id():
|
|
| 46 |
def unpack_pkl_data(s3_key=parameters.pkl_data_key):
|
| 47 |
exists = aws.check_if_exists(object_key=s3_key)
|
| 48 |
if not exists:
|
| 49 |
-
print("Pickle file does not exist!!!")
|
| 50 |
return None
|
| 51 |
try:
|
| 52 |
with aws.fs.open(f"s3://{aws.bucket_name}/{s3_key}", "rb") as f:
|
| 53 |
file_bytes = f.read()
|
| 54 |
loaded_data = pickle.loads(file_bytes)
|
| 55 |
-
print("Load pickle data completed.")
|
| 56 |
return loaded_data
|
| 57 |
except Exception as e:
|
| 58 |
-
print(f"Unable to load pickle file from S3 due to this: {e}")
|
| 59 |
return None
|
| 60 |
|
| 61 |
st.session_state.loaded_data = unpack_pkl_data()
|
|
@@ -69,7 +66,6 @@ if st.session_state.loaded_data:
|
|
| 69 |
V1_SPEAKERS = st.session_state.loaded_data['V1_SPEAKERS']
|
| 70 |
V2_SPEAKERS = st.session_state.loaded_data['V2_SPEAKERS']
|
| 71 |
else:
|
| 72 |
-
print("Failed to load pickle data. Exiting script.")
|
| 73 |
st.stop()
|
| 74 |
|
| 75 |
|
|
@@ -83,7 +79,6 @@ def save_generated_audio(audio_data, session_id):
|
|
| 83 |
aws.s3_upload_wav(obj=audio_file, s3_key=s3_key)
|
| 84 |
return s3_key
|
| 85 |
except Exception as e:
|
| 86 |
-
print(f"Error saving generated audio: {e}")
|
| 87 |
return None
|
| 88 |
|
| 89 |
def audio_header_creater(audio, channels=1, sample_rate=8000, bits_per_sample=16):
|
|
@@ -123,6 +118,7 @@ def ensure_csv_exists(sep="|"):
|
|
| 123 |
"speech_rate",
|
| 124 |
"loudness",
|
| 125 |
"refine_generation",
|
|
|
|
| 126 |
"rating",
|
| 127 |
"feedback",
|
| 128 |
]
|
|
@@ -154,6 +150,7 @@ def ensure_error_logs_csv_exists(sep="|"):
|
|
| 154 |
"speech_rate",
|
| 155 |
"loudness",
|
| 156 |
"refine_generation",
|
|
|
|
| 157 |
]
|
| 158 |
|
| 159 |
df = pd.DataFrame(columns=columns)
|
|
@@ -174,6 +171,7 @@ def log_initial_submission(
|
|
| 174 |
user_id,
|
| 175 |
voice_path,
|
| 176 |
text_input,
|
|
|
|
| 177 |
expressiveness=1.0,
|
| 178 |
stability=100,
|
| 179 |
clarity=1.0,
|
|
@@ -194,6 +192,7 @@ def log_initial_submission(
|
|
| 194 |
{
|
| 195 |
"timestamp": [timestamp],
|
| 196 |
"session_id": [session_id],
|
|
|
|
| 197 |
"language": [language],
|
| 198 |
"input_method": [input_method],
|
| 199 |
"agent_used": [agent_used if agent_used else "None"],
|
|
@@ -229,7 +228,6 @@ def log_initial_submission(
|
|
| 229 |
|
| 230 |
return "Audio generated and saved!"
|
| 231 |
except Exception as e:
|
| 232 |
-
print(f"Error saving submission: {e}")
|
| 233 |
return f"Error: Could not save data - {str(e)}"
|
| 234 |
else:
|
| 235 |
try:
|
|
@@ -252,6 +250,7 @@ def log_initial_submission(
|
|
| 252 |
"speech_rate": [speech_rate],
|
| 253 |
"loudness": [loudness],
|
| 254 |
"refine_generation": [refine_generation],
|
|
|
|
| 255 |
}
|
| 256 |
)
|
| 257 |
|
|
@@ -286,7 +285,6 @@ def update_rating(session_id, rating_index, feedback_msg: str):
|
|
| 286 |
s3_csv_file = ensure_csv_exists(sep="|")
|
| 287 |
|
| 288 |
if not aws.check_if_exists(object_key=s3_csv_file):
|
| 289 |
-
print(f"CSV file doesn't exist or is empty: {s3_csv_file}")
|
| 290 |
return "Error: No data found"
|
| 291 |
|
| 292 |
with aws.fs.open(f"s3://{aws.bucket_name}/{s3_csv_file}", "r") as f:
|
|
@@ -314,12 +312,10 @@ def update_rating(session_id, rating_index, feedback_msg: str):
|
|
| 314 |
st.success(f"Your rating of {star_dict[rating]} submitted successfully!!\nThank you for the feedback!!")
|
| 315 |
)
|
| 316 |
else:
|
| 317 |
-
print(f"Session {session_id} not found in the CSV file")
|
| 318 |
return (
|
| 319 |
f"Could not find Session {session_id} in tracks\nMake sure to press Generate button Once!!!"
|
| 320 |
), None
|
| 321 |
except Exception as e:
|
| 322 |
-
print(f"Error updating rating: {e}")
|
| 323 |
return f"Error: Could not update rating - {str(e)}", None
|
| 324 |
|
| 325 |
|
|
@@ -424,7 +420,6 @@ async def v1_generate_speech_async(
|
|
| 424 |
else:
|
| 425 |
send_voice_id = voice_id
|
| 426 |
|
| 427 |
-
print(f"\nPayload for Generation:-\nRequest goes to:- {request_to}\nText:- {text}\nVoide Id : {send_voice_id}\nextra_body :{extra_body}\nextra_headers : {extra_headers}\n")
|
| 428 |
# Use AsyncOpenAI streaming response (matches your original code)
|
| 429 |
try:
|
| 430 |
async with v1_client.audio.speech.with_streaming_response.create(
|
|
@@ -450,6 +445,7 @@ async def v1_generate_speech_async(
|
|
| 450 |
user_id=user_id,
|
| 451 |
voice_path=saved_path,
|
| 452 |
text_input=text,
|
|
|
|
| 453 |
expressiveness=expressive,
|
| 454 |
stability=stability,
|
| 455 |
clarity=clarity,
|
|
@@ -458,26 +454,7 @@ async def v1_generate_speech_async(
|
|
| 458 |
)
|
| 459 |
return sr, aud
|
| 460 |
except Exception as e:
|
| 461 |
-
|
| 462 |
-
print(f"Error:- {e}")
|
| 463 |
-
log_initial_submission(
|
| 464 |
-
code=response.status_code,
|
| 465 |
-
session_id=session_id,
|
| 466 |
-
language=language_code,
|
| 467 |
-
input_method=voice_mode,
|
| 468 |
-
agent_used=voice_id,
|
| 469 |
-
user_id=user_id,
|
| 470 |
-
voice_path=None,
|
| 471 |
-
text_input=text,
|
| 472 |
-
expressiveness=expressive,
|
| 473 |
-
stability=stability,
|
| 474 |
-
clarity=clarity,
|
| 475 |
-
speech_rate=speech_rate,
|
| 476 |
-
loudness=volume_level,
|
| 477 |
-
refine_generation=False,
|
| 478 |
-
err_code=response.status_code,
|
| 479 |
-
err_msg=e,
|
| 480 |
-
)
|
| 481 |
|
| 482 |
async def v2_generate_speech_async(
|
| 483 |
session_id: str,
|
|
@@ -515,7 +492,6 @@ async def v2_generate_speech_async(
|
|
| 515 |
else:
|
| 516 |
send_voice_id = voice_id
|
| 517 |
|
| 518 |
-
print(f"\nPayload for Generation:-\nRequest goes to:- {request_to}\nText:- {text}\nVoide Id : {send_voice_id}\nextra_body :{extra_body}\n")
|
| 519 |
# Use AsyncOpenAI streaming response (matches your original code)
|
| 520 |
try:
|
| 521 |
async with v2_client.audio.speech.with_streaming_response.create(
|
|
@@ -540,32 +516,13 @@ async def v2_generate_speech_async(
|
|
| 540 |
user_id=user_id,
|
| 541 |
voice_path=saved_path,
|
| 542 |
text_input=text,
|
|
|
|
| 543 |
expressiveness=expressive,
|
| 544 |
stability=stability,
|
| 545 |
clarity=clarity,
|
| 546 |
speech_rate=speech_rate,
|
| 547 |
loudness=volume_level
|
| 548 |
)
|
| 549 |
-
status_code = response.status_code
|
| 550 |
return sr, aud
|
| 551 |
except Exception as e:
|
| 552 |
-
|
| 553 |
-
print(f"Error:- {e}")
|
| 554 |
-
log_initial_submission(
|
| 555 |
-
code=status_code,
|
| 556 |
-
session_id=session_id,
|
| 557 |
-
language=language_code,
|
| 558 |
-
input_method=voice_mode,
|
| 559 |
-
agent_used=voice_id,
|
| 560 |
-
user_id=user_id,
|
| 561 |
-
voice_path=None,
|
| 562 |
-
text_input=text,
|
| 563 |
-
expressiveness=expressive,
|
| 564 |
-
stability=stability,
|
| 565 |
-
clarity=clarity,
|
| 566 |
-
speech_rate=speech_rate,
|
| 567 |
-
loudness=volume_level,
|
| 568 |
-
refine_generation=False,
|
| 569 |
-
err_code=response.status_code,
|
| 570 |
-
err_msg=e,
|
| 571 |
-
)
|
|
|
|
| 46 |
def unpack_pkl_data(s3_key=parameters.pkl_data_key):
|
| 47 |
exists = aws.check_if_exists(object_key=s3_key)
|
| 48 |
if not exists:
|
|
|
|
| 49 |
return None
|
| 50 |
try:
|
| 51 |
with aws.fs.open(f"s3://{aws.bucket_name}/{s3_key}", "rb") as f:
|
| 52 |
file_bytes = f.read()
|
| 53 |
loaded_data = pickle.loads(file_bytes)
|
|
|
|
| 54 |
return loaded_data
|
| 55 |
except Exception as e:
|
|
|
|
| 56 |
return None
|
| 57 |
|
| 58 |
st.session_state.loaded_data = unpack_pkl_data()
|
|
|
|
| 66 |
V1_SPEAKERS = st.session_state.loaded_data['V1_SPEAKERS']
|
| 67 |
V2_SPEAKERS = st.session_state.loaded_data['V2_SPEAKERS']
|
| 68 |
else:
|
|
|
|
| 69 |
st.stop()
|
| 70 |
|
| 71 |
|
|
|
|
| 79 |
aws.s3_upload_wav(obj=audio_file, s3_key=s3_key)
|
| 80 |
return s3_key
|
| 81 |
except Exception as e:
|
|
|
|
| 82 |
return None
|
| 83 |
|
| 84 |
def audio_header_creater(audio, channels=1, sample_rate=8000, bits_per_sample=16):
|
|
|
|
| 118 |
"speech_rate",
|
| 119 |
"loudness",
|
| 120 |
"refine_generation",
|
| 121 |
+
"model_name"
|
| 122 |
"rating",
|
| 123 |
"feedback",
|
| 124 |
]
|
|
|
|
| 150 |
"speech_rate",
|
| 151 |
"loudness",
|
| 152 |
"refine_generation",
|
| 153 |
+
"model_name"
|
| 154 |
]
|
| 155 |
|
| 156 |
df = pd.DataFrame(columns=columns)
|
|
|
|
| 171 |
user_id,
|
| 172 |
voice_path,
|
| 173 |
text_input,
|
| 174 |
+
model_name,
|
| 175 |
expressiveness=1.0,
|
| 176 |
stability=100,
|
| 177 |
clarity=1.0,
|
|
|
|
| 192 |
{
|
| 193 |
"timestamp": [timestamp],
|
| 194 |
"session_id": [session_id],
|
| 195 |
+
"model_name":[model_name],
|
| 196 |
"language": [language],
|
| 197 |
"input_method": [input_method],
|
| 198 |
"agent_used": [agent_used if agent_used else "None"],
|
|
|
|
| 228 |
|
| 229 |
return "Audio generated and saved!"
|
| 230 |
except Exception as e:
|
|
|
|
| 231 |
return f"Error: Could not save data - {str(e)}"
|
| 232 |
else:
|
| 233 |
try:
|
|
|
|
| 250 |
"speech_rate": [speech_rate],
|
| 251 |
"loudness": [loudness],
|
| 252 |
"refine_generation": [refine_generation],
|
| 253 |
+
"model_name": [model_name]
|
| 254 |
}
|
| 255 |
)
|
| 256 |
|
|
|
|
| 285 |
s3_csv_file = ensure_csv_exists(sep="|")
|
| 286 |
|
| 287 |
if not aws.check_if_exists(object_key=s3_csv_file):
|
|
|
|
| 288 |
return "Error: No data found"
|
| 289 |
|
| 290 |
with aws.fs.open(f"s3://{aws.bucket_name}/{s3_csv_file}", "r") as f:
|
|
|
|
| 312 |
st.success(f"Your rating of {star_dict[rating]} submitted successfully!!\nThank you for the feedback!!")
|
| 313 |
)
|
| 314 |
else:
|
|
|
|
| 315 |
return (
|
| 316 |
f"Could not find Session {session_id} in tracks\nMake sure to press Generate button Once!!!"
|
| 317 |
), None
|
| 318 |
except Exception as e:
|
|
|
|
| 319 |
return f"Error: Could not update rating - {str(e)}", None
|
| 320 |
|
| 321 |
|
|
|
|
| 420 |
else:
|
| 421 |
send_voice_id = voice_id
|
| 422 |
|
|
|
|
| 423 |
# Use AsyncOpenAI streaming response (matches your original code)
|
| 424 |
try:
|
| 425 |
async with v1_client.audio.speech.with_streaming_response.create(
|
|
|
|
| 445 |
user_id=user_id,
|
| 446 |
voice_path=saved_path,
|
| 447 |
text_input=text,
|
| 448 |
+
model_name=request_to,
|
| 449 |
expressiveness=expressive,
|
| 450 |
stability=stability,
|
| 451 |
clarity=clarity,
|
|
|
|
| 454 |
)
|
| 455 |
return sr, aud
|
| 456 |
except Exception as e:
|
| 457 |
+
st.Error("There is some in Audios Generation. Pleace try later.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 458 |
|
| 459 |
async def v2_generate_speech_async(
|
| 460 |
session_id: str,
|
|
|
|
| 492 |
else:
|
| 493 |
send_voice_id = voice_id
|
| 494 |
|
|
|
|
| 495 |
# Use AsyncOpenAI streaming response (matches your original code)
|
| 496 |
try:
|
| 497 |
async with v2_client.audio.speech.with_streaming_response.create(
|
|
|
|
| 516 |
user_id=user_id,
|
| 517 |
voice_path=saved_path,
|
| 518 |
text_input=text,
|
| 519 |
+
model_name=request_to,
|
| 520 |
expressiveness=expressive,
|
| 521 |
stability=stability,
|
| 522 |
clarity=clarity,
|
| 523 |
speech_rate=speech_rate,
|
| 524 |
loudness=volume_level
|
| 525 |
)
|
|
|
|
| 526 |
return sr, aud
|
| 527 |
except Exception as e:
|
| 528 |
+
st.Error("There is some in Audios Generation. Pleace try later.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|