Spaces:
Paused
Paused
rick
commited on
...not done...
Browse files- pages/main.py +161 -193
pages/main.py
CHANGED
|
@@ -14,6 +14,7 @@ from typing import List
|
|
| 14 |
from typing import Optional
|
| 15 |
from typing import Tuple
|
| 16 |
from typing import Union
|
|
|
|
| 17 |
|
| 18 |
# Third-party libraries
|
| 19 |
import requests
|
|
@@ -63,91 +64,48 @@ def process_tts_message(text_response: str) -> Tuple[Optional[bytes], Optional[f
|
|
| 63 |
st.error(f"Une erreur s'est produite lors de la conversion texte-parole : {e}")
|
| 64 |
return None, None
|
| 65 |
|
| 66 |
-
|
|
|
|
| 67 |
"""
|
| 68 |
Divise un fichier audio en segments de taille maximale spécifiée.
|
| 69 |
|
| 70 |
Args:
|
| 71 |
-
audio_file
|
| 72 |
max_size_mb (int): Taille maximale de chaque segment en Mo.
|
| 73 |
|
| 74 |
Returns:
|
| 75 |
-
List[
|
| 76 |
"""
|
| 77 |
try:
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
temp_audio_file.write(audio_data.encode())
|
| 81 |
-
temp_audio_file.seek(0)
|
| 82 |
-
else:
|
| 83 |
-
temp_audio_file.write(audio_data)
|
| 84 |
-
temp_audio_file.seek(0)
|
| 85 |
-
|
| 86 |
-
audio = AudioSegment.from_file(temp_audio_file, format="wav")
|
| 87 |
duration_ms = len(audio)
|
| 88 |
-
segment_duration_ms = int(
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
segments = []
|
| 91 |
for start in range(0, duration_ms, segment_duration_ms):
|
| 92 |
-
tmp_seg_file = tempfile.TemporaryFile()
|
| 93 |
end = min(start + segment_duration_ms, duration_ms)
|
| 94 |
segment = audio[start:end]
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
|
| 100 |
-
temp_audio_file.close()
|
| 101 |
return segments
|
| 102 |
except Exception as e:
|
| 103 |
-
print(f"
|
| 104 |
return []
|
| 105 |
|
| 106 |
-
def transcribe_segment(segment_data: Union[str, bytes], language: Optional[str] = None) -> str:
|
| 107 |
-
"""
|
| 108 |
-
Transcrit un segment audio en texte.
|
| 109 |
-
|
| 110 |
-
Args:
|
| 111 |
-
segment_path (str): Chemin vers le segment audio.
|
| 112 |
-
language (Optional[str]): La langue de l'audio.
|
| 113 |
-
|
| 114 |
-
Returns:
|
| 115 |
-
str: Le texte transcrit.
|
| 116 |
-
"""
|
| 117 |
-
try:
|
| 118 |
-
audio_segment = tempfile.TemporaryFile()
|
| 119 |
-
if isinstance(segment_data, str):
|
| 120 |
-
audio_segment.write(segment_data.encode())
|
| 121 |
-
else:
|
| 122 |
-
audio_segment.write(segment_data)
|
| 123 |
-
|
| 124 |
-
audio_segment.seek(0)
|
| 125 |
-
if not (language):
|
| 126 |
-
transcript = client.audio.transcriptions.create(
|
| 127 |
-
model="whisper-1",
|
| 128 |
-
file=audio_segment,
|
| 129 |
-
response_format="text"
|
| 130 |
-
)
|
| 131 |
-
else:
|
| 132 |
-
transcript = client.audio.transcriptions.create(
|
| 133 |
-
model="whisper-1",
|
| 134 |
-
file=audio_segment,
|
| 135 |
-
language=language, # semble que language soit mal formatter au format ISO6391
|
| 136 |
-
response_format="text"
|
| 137 |
-
)
|
| 138 |
-
|
| 139 |
-
audio_segment.close()
|
| 140 |
-
return transcript
|
| 141 |
-
except Exception as e:
|
| 142 |
-
print(f"Erreur lors de la transcription du segment : {e}")
|
| 143 |
-
return ""
|
| 144 |
|
| 145 |
-
def transcribe_audio(
|
| 146 |
"""
|
| 147 |
-
Transcrit un fichier audio en texte.
|
| 148 |
|
| 149 |
Args:
|
| 150 |
-
|
| 151 |
language (Optional[str]): La langue de l'audio. Par défaut None.
|
| 152 |
|
| 153 |
Returns:
|
|
@@ -156,50 +114,42 @@ def transcribe_audio(audio_data: Union[str, bytes], language: Optional[str] = No
|
|
| 156 |
max_size_mb = 25
|
| 157 |
|
| 158 |
try:
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
return full_transcript.strip()
|
| 191 |
-
else:
|
| 192 |
-
status.update(label="Transcription de l'audio...")
|
| 193 |
-
temp_audio_file.seek(0)
|
| 194 |
-
transcript = transcribe_segment(temp_audio_file.read(), language)
|
| 195 |
-
status.update(label="Transcription terminée", state="complete")
|
| 196 |
|
| 197 |
-
|
| 198 |
except Exception as e:
|
| 199 |
-
|
| 200 |
return ""
|
| 201 |
-
finally:
|
| 202 |
-
temp_audio_file.close()
|
| 203 |
|
| 204 |
|
| 205 |
def detect_language(input_text: str, temperature: float = 0.01) -> str:
|
|
@@ -252,13 +202,13 @@ def detect_language(input_text: str, temperature: float = 0.01) -> str:
|
|
| 252 |
|
| 253 |
|
| 254 |
|
| 255 |
-
def concatenate_audio_files(audio_list: List[Tuple[bytes, float]]) -> Optional[bytes]:
|
| 256 |
"""
|
| 257 |
Concatène plusieurs fichiers audio avec des effets sonores.
|
| 258 |
|
| 259 |
Args:
|
| 260 |
-
audio_list (List[Tuple[bytes, float]]): Une liste de tuples, chacun contenant
|
| 261 |
-
des octets audio et la durée.
|
| 262 |
|
| 263 |
Returns:
|
| 264 |
Optional[bytes]: L'audio concaténé sous forme d'octets, ou None en cas d'erreur.
|
|
@@ -278,13 +228,20 @@ def concatenate_audio_files(audio_list: List[Tuple[bytes, float]]) -> Optional[b
|
|
| 278 |
# 5 secondes de silence
|
| 279 |
silence = AudioSegment.silent(duration=1500) # 1500 ms = 1.5 secondes
|
| 280 |
|
| 281 |
-
for
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
# Convertir les octets en un segment audio
|
| 283 |
segment = AudioSegment.from_mp3(io.BytesIO(audio_bytes))
|
| 284 |
|
| 285 |
# Ajouter le son de début, le segment TTS, le son de fin et le silence
|
| 286 |
final_audio += begin_sound + segment + end_sound + silence
|
| 287 |
|
|
|
|
| 288 |
# Convertir le segment audio final en octets
|
| 289 |
buffer = io.BytesIO()
|
| 290 |
final_audio.export(buffer, format="mp3")
|
|
@@ -677,102 +634,113 @@ def main_page():
|
|
| 677 |
|
| 678 |
# Traitement de l'entrée audio de l'utilisateur
|
| 679 |
if len(st.session_state.audio) > 0:
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
st.write(f"Frame rate: {st.session_state.audio.frame_rate}, Frame width: {st.session_state.audio.frame_width}, Duration: {st.session_state.audio.duration_seconds} seconds")
|
| 685 |
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
language=st.session_state.language_detected
|
| 690 |
-
)
|
| 691 |
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
input_text=st.session_state.transcription, temperature=0.01
|
| 698 |
-
)
|
| 699 |
-
st.markdown(
|
| 700 |
-
f"- {get_translation('langue_detectee')}".format(
|
| 701 |
-
f"{convert_iso6391_to_language_name(st.session_state.language_detected)}"
|
| 702 |
-
)
|
| 703 |
-
)
|
| 704 |
-
st.markdown(
|
| 705 |
-
f"🎤 {get_translation('transcription_audio')}".format(
|
| 706 |
-
f"{st.session_state.transcription}"
|
| 707 |
-
)
|
| 708 |
-
)
|
| 709 |
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
|
|
|
| 714 |
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 720 |
)
|
| 721 |
-
),
|
| 722 |
-
to_lang=st.session_state.target_language
|
| 723 |
-
)
|
| 724 |
|
| 725 |
-
with st.chat_message("assistant", avatar="👻"):
|
| 726 |
-
message_placeholder = st.empty()
|
| 727 |
-
st.session_state.response_generator = process_message(
|
| 728 |
-
st.session_state.transcription,
|
| 729 |
-
st.session_state.operation_prompt,
|
| 730 |
-
st.session_state.enable_tts_for_input_from_audio_record,
|
| 731 |
-
st.session_state.system_prompt
|
| 732 |
-
)
|
| 733 |
|
| 734 |
-
|
| 735 |
-
|
| 736 |
-
|
| 737 |
-
|
| 738 |
-
message_placeholder.markdown(st.session_state.full_response)
|
| 739 |
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
|
| 743 |
-
|
| 744 |
-
|
| 745 |
-
( st.session_state.tts_audio,
|
| 746 |
-
st.session_state.tts_duration )
|
| 747 |
)
|
| 748 |
-
|
| 749 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 750 |
|
| 751 |
-
if st.session_state.audio_list:
|
| 752 |
-
st.session_state.final_audio = concatenate_audio_files(st.session_state.audio_list)
|
| 753 |
-
|
| 754 |
-
with st.container(border=True):
|
| 755 |
-
|
| 756 |
-
# Générer un nom de fichier unique
|
| 757 |
-
st.session_state.timestamp = time.strftime("%Y%m%d-%H%M%S")
|
| 758 |
-
st.session_state.langues = "_".join([lang["iso-639-1"] for lang in st.session_state.selected_languages])
|
| 759 |
-
st.session_state.nom_fichier = f"reponse_audio_{st.session_state.langues}_{st.session_state.timestamp}.mp3"
|
| 760 |
-
|
| 761 |
-
st.audio(st.session_state.final_audio,
|
| 762 |
-
format="audio/mp3",
|
| 763 |
-
autoplay=st.session_state.autoplay_tts)
|
| 764 |
-
|
| 765 |
-
st.download_button(
|
| 766 |
-
label=f"📥 {get_translation('telecharger_audio')}",
|
| 767 |
-
data=st.session_state.final_audio,
|
| 768 |
-
file_name=st.session_state.nom_fichier,
|
| 769 |
-
mime="audio/mp3",
|
| 770 |
-
use_container_width=True,
|
| 771 |
-
type="primary",
|
| 772 |
-
key=f"download_button_{st.session_state.langues}_{st.session_state.timestamp}",
|
| 773 |
-
)
|
| 774 |
-
#
|
| 775 |
-
clear_inputs_garbages()
|
| 776 |
|
| 777 |
|
| 778 |
def clear_inputs_garbages(sessions_state_list: Optional[list] =
|
|
@@ -863,4 +831,4 @@ with st.sidebar:
|
|
| 863 |
|
| 864 |
|
| 865 |
|
| 866 |
-
main_page()
|
|
|
|
| 14 |
from typing import Optional
|
| 15 |
from typing import Tuple
|
| 16 |
from typing import Union
|
| 17 |
+
from io import BytesIO
|
| 18 |
|
| 19 |
# Third-party libraries
|
| 20 |
import requests
|
|
|
|
| 64 |
st.error(f"Une erreur s'est produite lors de la conversion texte-parole : {e}")
|
| 65 |
return None, None
|
| 66 |
|
| 67 |
+
# ecrire ici la fonction: split_audio
|
| 68 |
+
def split_audio(audio_file, max_size_mb: int = 25) -> List[bytes]:
|
| 69 |
"""
|
| 70 |
Divise un fichier audio en segments de taille maximale spécifiée.
|
| 71 |
|
| 72 |
Args:
|
| 73 |
+
audio_file: Fichier audio ouvert en mode binaire.
|
| 74 |
max_size_mb (int): Taille maximale de chaque segment en Mo.
|
| 75 |
|
| 76 |
Returns:
|
| 77 |
+
List[bytes]: Liste des segments audio divisés sous forme de bytes.
|
| 78 |
"""
|
| 79 |
try:
|
| 80 |
+
audio_file.seek(0)
|
| 81 |
+
audio = AudioSegment.from_file(audio_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
duration_ms = len(audio)
|
| 83 |
+
segment_duration_ms = int(
|
| 84 |
+
(max_size_mb * 1024 * 1024 * 8) /
|
| 85 |
+
(audio.frame_rate * audio.sample_width * audio.channels)
|
| 86 |
+
)
|
| 87 |
|
| 88 |
segments = []
|
| 89 |
for start in range(0, duration_ms, segment_duration_ms):
|
|
|
|
| 90 |
end = min(start + segment_duration_ms, duration_ms)
|
| 91 |
segment = audio[start:end]
|
| 92 |
+
|
| 93 |
+
with io.BytesIO() as buffer:
|
| 94 |
+
segment.export(buffer, format="mp3")
|
| 95 |
+
segments.append(buffer.getvalue())
|
| 96 |
|
|
|
|
| 97 |
return segments
|
| 98 |
except Exception as e:
|
| 99 |
+
print(f"Une erreur s'est produite lors de la division de l'audio : {e}")
|
| 100 |
return []
|
| 101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
+
def transcribe_audio(filepath: Union[str, IO], language: Optional[str] = None) -> str:
|
| 104 |
"""
|
| 105 |
+
Transcrit un fichier audio temporaire en texte.
|
| 106 |
|
| 107 |
Args:
|
| 108 |
+
filepath Chemin vers le fichier audio temporaire à transcrire.
|
| 109 |
language (Optional[str]): La langue de l'audio. Par défaut None.
|
| 110 |
|
| 111 |
Returns:
|
|
|
|
| 114 |
max_size_mb = 25
|
| 115 |
|
| 116 |
try:
|
| 117 |
+
transcriptions = []
|
| 118 |
+
with open(filepath if isinstance(filepath, str) else filepath.name, "rb") as f:
|
| 119 |
+
# filepath peut etre un chemin vers un fichier audio ou un objet IO
|
| 120 |
+
# verifier si le fichier audio fait plus de 25 Mo
|
| 121 |
+
|
| 122 |
+
# Diviser l'audio en segments de taille maximale
|
| 123 |
+
#segments = split_audio(f, max_size_mb)
|
| 124 |
+
f.seek(0)
|
| 125 |
+
audio = AudioSegment.from_file(f)
|
| 126 |
+
duration_ms = len(audio)
|
| 127 |
+
segment_duration_ms = int(
|
| 128 |
+
(max_size_mb * 1024 * 1024 * 8) /
|
| 129 |
+
(audio.frame_rate * audio.sample_width * audio.channels)
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
for start in range(0, duration_ms, segment_duration_ms):
|
| 133 |
+
end = min(start + segment_duration_ms, duration_ms)
|
| 134 |
+
segment = audio[start:end]
|
| 135 |
+
|
| 136 |
+
buffer = BytesIO()
|
| 137 |
+
segment.export(buffer, format="mp3")
|
| 138 |
+
buffer.seek(0)
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
response = client.audio.transcriptions.create(
|
| 142 |
+
model="whisper-1",
|
| 143 |
+
file=("audio.mp3", buffer),
|
| 144 |
+
language=language,
|
| 145 |
+
response_format="text"
|
| 146 |
+
)
|
| 147 |
+
transcriptions.append(response)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
+
return " ".join(transcriptions)
|
| 150 |
except Exception as e:
|
| 151 |
+
print(f"Erreur lors de la transcription de l'audio : {e}")
|
| 152 |
return ""
|
|
|
|
|
|
|
| 153 |
|
| 154 |
|
| 155 |
def detect_language(input_text: str, temperature: float = 0.01) -> str:
|
|
|
|
| 202 |
|
| 203 |
|
| 204 |
|
| 205 |
+
def concatenate_audio_files(audio_list: List[Tuple[Union[bytes, str], float]]) -> Optional[bytes]:
|
| 206 |
"""
|
| 207 |
Concatène plusieurs fichiers audio avec des effets sonores.
|
| 208 |
|
| 209 |
Args:
|
| 210 |
+
audio_list (List[Tuple[Union[bytes, str], float]]): Une liste de tuples, chacun contenant
|
| 211 |
+
des octets audio (ou une chaîne base64) et la durée.
|
| 212 |
|
| 213 |
Returns:
|
| 214 |
Optional[bytes]: L'audio concaténé sous forme d'octets, ou None en cas d'erreur.
|
|
|
|
| 228 |
# 5 secondes de silence
|
| 229 |
silence = AudioSegment.silent(duration=1500) # 1500 ms = 1.5 secondes
|
| 230 |
|
| 231 |
+
for audio_data, _ in audio_list:
|
| 232 |
+
# Convertir en bytes si c'est une chaîne base64
|
| 233 |
+
if isinstance(audio_data, str):
|
| 234 |
+
audio_bytes = base64.b64decode(audio_data)
|
| 235 |
+
else:
|
| 236 |
+
audio_bytes = audio_data
|
| 237 |
+
|
| 238 |
# Convertir les octets en un segment audio
|
| 239 |
segment = AudioSegment.from_mp3(io.BytesIO(audio_bytes))
|
| 240 |
|
| 241 |
# Ajouter le son de début, le segment TTS, le son de fin et le silence
|
| 242 |
final_audio += begin_sound + segment + end_sound + silence
|
| 243 |
|
| 244 |
+
|
| 245 |
# Convertir le segment audio final en octets
|
| 246 |
buffer = io.BytesIO()
|
| 247 |
final_audio.export(buffer, format="mp3")
|
|
|
|
| 634 |
|
| 635 |
# Traitement de l'entrée audio de l'utilisateur
|
| 636 |
if len(st.session_state.audio) > 0:
|
| 637 |
+
if st.session_state.audio:
|
| 638 |
+
try:
|
| 639 |
+
st.subheader(f"Frame rate: {st.session_state.audio.frame_rate}, Frame width: {st.session_state.audio.frame_width}, Duration: {st.session_state.audio.duration_seconds} seconds")
|
|
|
|
|
|
|
| 640 |
|
| 641 |
+
with tempfile.NamedTemporaryFile(suffix=".mp3", delete_on_close=False) as tmp_file:
|
| 642 |
+
st.session_state.audio.export(tmp_file, format="mp3")
|
| 643 |
+
tmp_file.close()
|
|
|
|
|
|
|
| 644 |
|
| 645 |
+
# Transcrire l'audio en texte
|
| 646 |
+
st.session_state.transcription = transcribe_audio(
|
| 647 |
+
tmp_file,
|
| 648 |
+
language=st.session_state.language_detected
|
| 649 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 650 |
|
| 651 |
+
st.markdown(
|
| 652 |
+
f"🎤 {get_translation('transcription_audio')}".format(
|
| 653 |
+
f"{st.session_state.transcription}"
|
| 654 |
+
)
|
| 655 |
+
)
|
| 656 |
|
| 657 |
+
# Detecter la langue du texte transcrit (si la langue source n'est pas détectée)
|
| 658 |
+
if st.session_state.language_detected is None:
|
| 659 |
+
st.session_state.language_detected = detect_language(
|
| 660 |
+
input_text=st.session_state.transcription, temperature=0.01
|
| 661 |
+
)
|
| 662 |
+
st.markdown(
|
| 663 |
+
f"- {get_translation('langue_detectee')}".format(
|
| 664 |
+
f"{convert_iso6391_to_language_name(st.session_state.language_detected)}"
|
| 665 |
+
)
|
| 666 |
)
|
|
|
|
|
|
|
|
|
|
| 667 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 668 |
|
| 669 |
+
st.session_state.audio_list = []
|
| 670 |
+
for cursor_selected_lang in st.session_state.selected_languages:
|
| 671 |
+
st.session_state.target_language = cursor_selected_lang["iso-639-1"]
|
| 672 |
+
st.session_state.full_response = ""
|
|
|
|
| 673 |
|
| 674 |
+
# Initialisation du mode de traitement pour la langue cible actuelle
|
| 675 |
+
st.session_state.system_prompt, st.session_state.operation_prompt = init_process_mode(from_lang=
|
| 676 |
+
(
|
| 677 |
+
st.session_state.language_detected if "language_detected" in st.session_state.language_detected else convert_language_name_to_iso6391(
|
| 678 |
+
st.session_state.interface_language
|
|
|
|
|
|
|
| 679 |
)
|
| 680 |
+
),
|
| 681 |
+
to_lang=st.session_state.target_language
|
| 682 |
+
)
|
| 683 |
+
|
| 684 |
+
with st.chat_message("assistant", avatar="👻"):
|
| 685 |
+
message_placeholder = st.empty()
|
| 686 |
+
st.session_state.response_generator = process_message(
|
| 687 |
+
st.session_state.transcription,
|
| 688 |
+
st.session_state.operation_prompt,
|
| 689 |
+
st.session_state.enable_tts_for_input_from_audio_record,
|
| 690 |
+
st.session_state.system_prompt
|
| 691 |
+
)
|
| 692 |
+
|
| 693 |
+
for response_chunk in st.session_state.response_generator:
|
| 694 |
+
message_placeholder.markdown(response_chunk)
|
| 695 |
+
st.session_state.end_response = st.session_state.response_generator.close()
|
| 696 |
+
if st.session_state.full_response != "":
|
| 697 |
+
message_placeholder.markdown(st.session_state.full_response)
|
| 698 |
+
|
| 699 |
+
if st.session_state.enable_tts_for_input_from_audio_record:
|
| 700 |
+
st.session_state.tts_audio, st.session_state.tts_duration = process_tts_message(st.session_state.full_response)
|
| 701 |
+
|
| 702 |
+
if st.session_state.tts_audio:
|
| 703 |
+
st.session_state.audio_list.append(
|
| 704 |
+
( st.session_state.tts_audio,
|
| 705 |
+
st.session_state.tts_duration )
|
| 706 |
+
)
|
| 707 |
+
else:
|
| 708 |
+
pass
|
| 709 |
+
|
| 710 |
+
if st.session_state.audio_list:
|
| 711 |
+
st.session_state.final_audio = concatenate_audio_files(st.session_state.audio_list)
|
| 712 |
+
|
| 713 |
+
with st.container(border=True):
|
| 714 |
+
|
| 715 |
+
# Générer un nom de fichier unique
|
| 716 |
+
st.session_state.timestamp = time.strftime("%Y%m%d-%H%M%S")
|
| 717 |
+
st.session_state.langues = "_".join([lang["iso-639-1"] for lang in st.session_state.selected_languages])
|
| 718 |
+
st.session_state.nom_fichier = f"reponse_audio_{st.session_state.langues}_{st.session_state.timestamp}.mp3"
|
| 719 |
+
|
| 720 |
+
st.audio(st.session_state.final_audio,
|
| 721 |
+
format="audio/mp3",
|
| 722 |
+
autoplay=st.session_state.autoplay_tts)
|
| 723 |
+
|
| 724 |
+
st.download_button(
|
| 725 |
+
label=f"📥 {get_translation('telecharger_audio')}",
|
| 726 |
+
data=st.session_state.final_audio,
|
| 727 |
+
file_name=st.session_state.nom_fichier,
|
| 728 |
+
mime="audio/mp3",
|
| 729 |
+
use_container_width=True,
|
| 730 |
+
type="primary",
|
| 731 |
+
key=f"download_button_{st.session_state.langues}_{st.session_state.timestamp}",
|
| 732 |
+
)
|
| 733 |
+
|
| 734 |
+
except Exception as e:
|
| 735 |
+
st.error(f"[AUDIO] - Erreur lors de l'exportation de l'audio : {str(e)}")
|
| 736 |
+
finally:
|
| 737 |
+
clear_inputs_garbages()
|
| 738 |
+
# if 'tmp_file' in locals():
|
| 739 |
+
# os.unlink(tmp_file.name)
|
| 740 |
+
|
| 741 |
+
|
| 742 |
+
|
| 743 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 744 |
|
| 745 |
|
| 746 |
def clear_inputs_garbages(sessions_state_list: Optional[list] =
|
|
|
|
| 831 |
|
| 832 |
|
| 833 |
|
| 834 |
+
main_page()
|