rick commited on
Commit
30928b9
·
unverified ·
1 Parent(s): 1dc4889

...not done...

Browse files
Files changed (1) hide show
  1. pages/main.py +161 -193
pages/main.py CHANGED
@@ -14,6 +14,7 @@ from typing import List
14
  from typing import Optional
15
  from typing import Tuple
16
  from typing import Union
 
17
 
18
  # Third-party libraries
19
  import requests
@@ -63,91 +64,48 @@ def process_tts_message(text_response: str) -> Tuple[Optional[bytes], Optional[f
63
  st.error(f"Une erreur s'est produite lors de la conversion texte-parole : {e}")
64
  return None, None
65
 
66
- def split_audio(audio_data: Union[str, bytes], max_size_mb: int = 25) -> List[str]:
 
67
  """
68
  Divise un fichier audio en segments de taille maximale spécifiée.
69
 
70
  Args:
71
- audio_file (str): Chemin vers le fichier audio.
72
  max_size_mb (int): Taille maximale de chaque segment en Mo.
73
 
74
  Returns:
75
- List[str]: Liste des chemins vers les segments audio divisés.
76
  """
77
  try:
78
- temp_audio_file = tempfile.TemporaryFile()
79
- if isinstance(audio_data, str):
80
- temp_audio_file.write(audio_data.encode())
81
- temp_audio_file.seek(0)
82
- else:
83
- temp_audio_file.write(audio_data)
84
- temp_audio_file.seek(0)
85
-
86
- audio = AudioSegment.from_file(temp_audio_file, format="wav")
87
  duration_ms = len(audio)
88
- segment_duration_ms = int((max_size_mb * 1024 * 1024 * 8) / (audio.frame_rate * audio.sample_width * audio.channels))
 
 
 
89
 
90
  segments = []
91
  for start in range(0, duration_ms, segment_duration_ms):
92
- tmp_seg_file = tempfile.TemporaryFile()
93
  end = min(start + segment_duration_ms, duration_ms)
94
  segment = audio[start:end]
95
- segment.export(tmp_seg_file, format="mp3")
96
- tmp_seg_file.seek(0)
97
- segments.append(base64.b64encode(tmp_seg_file.read()).decode())
98
- tmp_seg_file.close()
99
 
100
- temp_audio_file.close()
101
  return segments
102
  except Exception as e:
103
- print(f"Erreur lors du découpage de l'audio : {e}")
104
  return []
105
 
106
- def transcribe_segment(segment_data: Union[str, bytes], language: Optional[str] = None) -> str:
107
- """
108
- Transcrit un segment audio en texte.
109
-
110
- Args:
111
- segment_path (str): Chemin vers le segment audio.
112
- language (Optional[str]): La langue de l'audio.
113
-
114
- Returns:
115
- str: Le texte transcrit.
116
- """
117
- try:
118
- audio_segment = tempfile.TemporaryFile()
119
- if isinstance(segment_data, str):
120
- audio_segment.write(segment_data.encode())
121
- else:
122
- audio_segment.write(segment_data)
123
-
124
- audio_segment.seek(0)
125
- if not (language):
126
- transcript = client.audio.transcriptions.create(
127
- model="whisper-1",
128
- file=audio_segment,
129
- response_format="text"
130
- )
131
- else:
132
- transcript = client.audio.transcriptions.create(
133
- model="whisper-1",
134
- file=audio_segment,
135
- language=language, # semble que language soit mal formatter au format ISO6391
136
- response_format="text"
137
- )
138
-
139
- audio_segment.close()
140
- return transcript
141
- except Exception as e:
142
- print(f"Erreur lors de la transcription du segment : {e}")
143
- return ""
144
 
145
- def transcribe_audio(audio_data: Union[str, bytes], language: Optional[str] = None) -> str:
146
  """
147
- Transcrit un fichier audio en texte.
148
 
149
  Args:
150
- audio_file (Union[str, IO]): Le chemin du fichier audio ou un objet fichier ouvert.
151
  language (Optional[str]): La langue de l'audio. Par défaut None.
152
 
153
  Returns:
@@ -156,50 +114,42 @@ def transcribe_audio(audio_data: Union[str, bytes], language: Optional[str] = No
156
  max_size_mb = 25
157
 
158
  try:
159
- with st.status("Transcription de l'audio en cours...") as status:
160
- temp_audio_file = tempfile.TemporaryFile()
161
- if isinstance(audio_data, str):
162
- temp_audio_file.write(audio_data.encode())
163
- temp_audio_file.seek(0)
164
- elif isinstance(audio_data, bytes):
165
- temp_audio_file.write(audio_data)
166
- temp_audio_file.seek(0)
167
-
168
- file_size_mb = temp_audio_file.tell() / (1024 * 1024)
169
-
170
- if file_size_mb > max_size_mb:
171
- status.update(label="Découpage de l'audio en segments...")
172
- temp_audio_file.seek(0)
173
- segments = split_audio(temp_audio_file.read(), max_size_mb)
174
- full_transcript = ""
175
- for i, segment in enumerate(segments):
176
- status.update(label=f"Transcription du segment {i+1}/{len(segments)}...")
177
- if not (language):
178
- transcript = transcribe_segment(
179
- base64.b64decode(segment.encode())
180
- )
181
- else:
182
- transcript = transcribe_segment(
183
- base64.b64decode(segment.encode()),
184
- language=language
185
- )
186
-
187
- full_transcript += f"{transcript} "
188
- status.update(label="Transcription terminée", state="complete")
189
-
190
- return full_transcript.strip()
191
- else:
192
- status.update(label="Transcription de l'audio...")
193
- temp_audio_file.seek(0)
194
- transcript = transcribe_segment(temp_audio_file.read(), language)
195
- status.update(label="Transcription terminée", state="complete")
196
 
197
- return transcript
198
  except Exception as e:
199
- st.error(f"Erreur lors de la transcription : {e}")
200
  return ""
201
- finally:
202
- temp_audio_file.close()
203
 
204
 
205
  def detect_language(input_text: str, temperature: float = 0.01) -> str:
@@ -252,13 +202,13 @@ def detect_language(input_text: str, temperature: float = 0.01) -> str:
252
 
253
 
254
 
255
- def concatenate_audio_files(audio_list: List[Tuple[bytes, float]]) -> Optional[bytes]:
256
  """
257
  Concatène plusieurs fichiers audio avec des effets sonores.
258
 
259
  Args:
260
- audio_list (List[Tuple[bytes, float]]): Une liste de tuples, chacun contenant
261
- des octets audio et la durée.
262
 
263
  Returns:
264
  Optional[bytes]: L'audio concaténé sous forme d'octets, ou None en cas d'erreur.
@@ -278,13 +228,20 @@ def concatenate_audio_files(audio_list: List[Tuple[bytes, float]]) -> Optional[b
278
  # 5 secondes de silence
279
  silence = AudioSegment.silent(duration=1500) # 1500 ms = 1.5 secondes
280
 
281
- for audio_bytes, _ in audio_list:
 
 
 
 
 
 
282
  # Convertir les octets en un segment audio
283
  segment = AudioSegment.from_mp3(io.BytesIO(audio_bytes))
284
 
285
  # Ajouter le son de début, le segment TTS, le son de fin et le silence
286
  final_audio += begin_sound + segment + end_sound + silence
287
 
 
288
  # Convertir le segment audio final en octets
289
  buffer = io.BytesIO()
290
  final_audio.export(buffer, format="mp3")
@@ -677,102 +634,113 @@ def main_page():
677
 
678
  # Traitement de l'entrée audio de l'utilisateur
679
  if len(st.session_state.audio) > 0:
680
- tmp_file = tempfile.TemporaryFile()
681
- st.session_state.audio.export(tmp_file, format="wav")
682
- tmp_file.seek(0)
683
-
684
- st.write(f"Frame rate: {st.session_state.audio.frame_rate}, Frame width: {st.session_state.audio.frame_width}, Duration: {st.session_state.audio.duration_seconds} seconds")
685
 
686
- # Transcrire l'audio en texte
687
- st.session_state.transcription = transcribe_audio(
688
- tmp_file.read(),
689
- language=st.session_state.language_detected
690
- )
691
 
692
- tmp_file.close()
693
-
694
- # Detecter la langue du texte transcrit (si la langue source n'est pas détectée)
695
- if st.session_state.language_detected is None:
696
- st.session_state.language_detected = detect_language(
697
- input_text=st.session_state.transcription, temperature=0.01
698
- )
699
- st.markdown(
700
- f"- {get_translation('langue_detectee')}".format(
701
- f"{convert_iso6391_to_language_name(st.session_state.language_detected)}"
702
- )
703
- )
704
- st.markdown(
705
- f"🎤 {get_translation('transcription_audio')}".format(
706
- f"{st.session_state.transcription}"
707
- )
708
- )
709
 
710
- st.session_state.audio_list = []
711
- for cursor_selected_lang in st.session_state.selected_languages:
712
- st.session_state.target_language = cursor_selected_lang["iso-639-1"]
713
- st.session_state.full_response = ""
 
714
 
715
- # Initialisation du mode de traitement pour la langue cible actuelle
716
- st.session_state.system_prompt, st.session_state.operation_prompt = init_process_mode(from_lang=
717
- (
718
- st.session_state.language_detected if "language_detected" in st.session_state.language_detected else convert_language_name_to_iso6391(
719
- st.session_state.interface_language
 
 
 
 
720
  )
721
- ),
722
- to_lang=st.session_state.target_language
723
- )
724
 
725
- with st.chat_message("assistant", avatar="👻"):
726
- message_placeholder = st.empty()
727
- st.session_state.response_generator = process_message(
728
- st.session_state.transcription,
729
- st.session_state.operation_prompt,
730
- st.session_state.enable_tts_for_input_from_audio_record,
731
- st.session_state.system_prompt
732
- )
733
 
734
- for response_chunk in st.session_state.response_generator:
735
- message_placeholder.markdown(response_chunk)
736
- st.session_state.end_response = st.session_state.response_generator.close()
737
- if st.session_state.full_response != "":
738
- message_placeholder.markdown(st.session_state.full_response)
739
 
740
- if st.session_state.enable_tts_for_input_from_audio_record:
741
- st.session_state.tts_audio, st.session_state.tts_duration = process_tts_message(st.session_state.full_response)
742
-
743
- if st.session_state.tts_audio:
744
- st.session_state.audio_list.append(
745
- ( st.session_state.tts_audio,
746
- st.session_state.tts_duration )
747
  )
748
- else:
749
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
750
 
751
- if st.session_state.audio_list:
752
- st.session_state.final_audio = concatenate_audio_files(st.session_state.audio_list)
753
-
754
- with st.container(border=True):
755
-
756
- # Générer un nom de fichier unique
757
- st.session_state.timestamp = time.strftime("%Y%m%d-%H%M%S")
758
- st.session_state.langues = "_".join([lang["iso-639-1"] for lang in st.session_state.selected_languages])
759
- st.session_state.nom_fichier = f"reponse_audio_{st.session_state.langues}_{st.session_state.timestamp}.mp3"
760
-
761
- st.audio(st.session_state.final_audio,
762
- format="audio/mp3",
763
- autoplay=st.session_state.autoplay_tts)
764
-
765
- st.download_button(
766
- label=f"📥 {get_translation('telecharger_audio')}",
767
- data=st.session_state.final_audio,
768
- file_name=st.session_state.nom_fichier,
769
- mime="audio/mp3",
770
- use_container_width=True,
771
- type="primary",
772
- key=f"download_button_{st.session_state.langues}_{st.session_state.timestamp}",
773
- )
774
- #
775
- clear_inputs_garbages()
776
 
777
 
778
  def clear_inputs_garbages(sessions_state_list: Optional[list] =
@@ -863,4 +831,4 @@ with st.sidebar:
863
 
864
 
865
 
866
- main_page()
 
14
  from typing import Optional
15
  from typing import Tuple
16
  from typing import Union
17
+ from io import BytesIO
18
 
19
  # Third-party libraries
20
  import requests
 
64
  st.error(f"Une erreur s'est produite lors de la conversion texte-parole : {e}")
65
  return None, None
66
 
67
+ # ecrire ici la fonction: split_audio
68
+ def split_audio(audio_file, max_size_mb: int = 25) -> List[bytes]:
69
  """
70
  Divise un fichier audio en segments de taille maximale spécifiée.
71
 
72
  Args:
73
+ audio_file: Fichier audio ouvert en mode binaire.
74
  max_size_mb (int): Taille maximale de chaque segment en Mo.
75
 
76
  Returns:
77
+ List[bytes]: Liste des segments audio divisés sous forme de bytes.
78
  """
79
  try:
80
+ audio_file.seek(0)
81
+ audio = AudioSegment.from_file(audio_file)
 
 
 
 
 
 
 
82
  duration_ms = len(audio)
83
+ segment_duration_ms = int(
84
+ (max_size_mb * 1024 * 1024 * 8) /
85
+ (audio.frame_rate * audio.sample_width * audio.channels)
86
+ )
87
 
88
  segments = []
89
  for start in range(0, duration_ms, segment_duration_ms):
 
90
  end = min(start + segment_duration_ms, duration_ms)
91
  segment = audio[start:end]
92
+
93
+ with io.BytesIO() as buffer:
94
+ segment.export(buffer, format="mp3")
95
+ segments.append(buffer.getvalue())
96
 
 
97
  return segments
98
  except Exception as e:
99
+ print(f"Une erreur s'est produite lors de la division de l'audio : {e}")
100
  return []
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
+ def transcribe_audio(filepath: Union[str, IO], language: Optional[str] = None) -> str:
104
  """
105
+ Transcrit un fichier audio temporaire en texte.
106
 
107
  Args:
108
+ filepath Chemin vers le fichier audio temporaire à transcrire.
109
  language (Optional[str]): La langue de l'audio. Par défaut None.
110
 
111
  Returns:
 
114
  max_size_mb = 25
115
 
116
  try:
117
+ transcriptions = []
118
+ with open(filepath if isinstance(filepath, str) else filepath.name, "rb") as f:
119
+ # filepath peut etre un chemin vers un fichier audio ou un objet IO
120
+ # verifier si le fichier audio fait plus de 25 Mo
121
+
122
+ # Diviser l'audio en segments de taille maximale
123
+ #segments = split_audio(f, max_size_mb)
124
+ f.seek(0)
125
+ audio = AudioSegment.from_file(f)
126
+ duration_ms = len(audio)
127
+ segment_duration_ms = int(
128
+ (max_size_mb * 1024 * 1024 * 8) /
129
+ (audio.frame_rate * audio.sample_width * audio.channels)
130
+ )
131
+
132
+ for start in range(0, duration_ms, segment_duration_ms):
133
+ end = min(start + segment_duration_ms, duration_ms)
134
+ segment = audio[start:end]
135
+
136
+ buffer = BytesIO()
137
+ segment.export(buffer, format="mp3")
138
+ buffer.seek(0)
139
+
140
+
141
+ response = client.audio.transcriptions.create(
142
+ model="whisper-1",
143
+ file=("audio.mp3", buffer),
144
+ language=language,
145
+ response_format="text"
146
+ )
147
+ transcriptions.append(response)
 
 
 
 
 
 
148
 
149
+ return " ".join(transcriptions)
150
  except Exception as e:
151
+ print(f"Erreur lors de la transcription de l'audio : {e}")
152
  return ""
 
 
153
 
154
 
155
  def detect_language(input_text: str, temperature: float = 0.01) -> str:
 
202
 
203
 
204
 
205
+ def concatenate_audio_files(audio_list: List[Tuple[Union[bytes, str], float]]) -> Optional[bytes]:
206
  """
207
  Concatène plusieurs fichiers audio avec des effets sonores.
208
 
209
  Args:
210
+ audio_list (List[Tuple[Union[bytes, str], float]]): Une liste de tuples, chacun contenant
211
+ des octets audio (ou une chaîne base64) et la durée.
212
 
213
  Returns:
214
  Optional[bytes]: L'audio concaténé sous forme d'octets, ou None en cas d'erreur.
 
228
  # 5 secondes de silence
229
  silence = AudioSegment.silent(duration=1500) # 1500 ms = 1.5 secondes
230
 
231
+ for audio_data, _ in audio_list:
232
+ # Convertir en bytes si c'est une chaîne base64
233
+ if isinstance(audio_data, str):
234
+ audio_bytes = base64.b64decode(audio_data)
235
+ else:
236
+ audio_bytes = audio_data
237
+
238
  # Convertir les octets en un segment audio
239
  segment = AudioSegment.from_mp3(io.BytesIO(audio_bytes))
240
 
241
  # Ajouter le son de début, le segment TTS, le son de fin et le silence
242
  final_audio += begin_sound + segment + end_sound + silence
243
 
244
+
245
  # Convertir le segment audio final en octets
246
  buffer = io.BytesIO()
247
  final_audio.export(buffer, format="mp3")
 
634
 
635
  # Traitement de l'entrée audio de l'utilisateur
636
  if len(st.session_state.audio) > 0:
637
+ if st.session_state.audio:
638
+ try:
639
+ st.subheader(f"Frame rate: {st.session_state.audio.frame_rate}, Frame width: {st.session_state.audio.frame_width}, Duration: {st.session_state.audio.duration_seconds} seconds")
 
 
640
 
641
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete_on_close=False) as tmp_file:
642
+ st.session_state.audio.export(tmp_file, format="mp3")
643
+ tmp_file.close()
 
 
644
 
645
+ # Transcrire l'audio en texte
646
+ st.session_state.transcription = transcribe_audio(
647
+ tmp_file,
648
+ language=st.session_state.language_detected
649
+ )
 
 
 
 
 
 
 
 
 
 
 
 
650
 
651
+ st.markdown(
652
+ f"🎤 {get_translation('transcription_audio')}".format(
653
+ f"{st.session_state.transcription}"
654
+ )
655
+ )
656
 
657
+ # Detecter la langue du texte transcrit (si la langue source n'est pas détectée)
658
+ if st.session_state.language_detected is None:
659
+ st.session_state.language_detected = detect_language(
660
+ input_text=st.session_state.transcription, temperature=0.01
661
+ )
662
+ st.markdown(
663
+ f"- {get_translation('langue_detectee')}".format(
664
+ f"{convert_iso6391_to_language_name(st.session_state.language_detected)}"
665
+ )
666
  )
 
 
 
667
 
 
 
 
 
 
 
 
 
668
 
669
+ st.session_state.audio_list = []
670
+ for cursor_selected_lang in st.session_state.selected_languages:
671
+ st.session_state.target_language = cursor_selected_lang["iso-639-1"]
672
+ st.session_state.full_response = ""
 
673
 
674
+ # Initialisation du mode de traitement pour la langue cible actuelle
675
+ st.session_state.system_prompt, st.session_state.operation_prompt = init_process_mode(from_lang=
676
+ (
677
+ st.session_state.language_detected if "language_detected" in st.session_state.language_detected else convert_language_name_to_iso6391(
678
+ st.session_state.interface_language
 
 
679
  )
680
+ ),
681
+ to_lang=st.session_state.target_language
682
+ )
683
+
684
+ with st.chat_message("assistant", avatar="👻"):
685
+ message_placeholder = st.empty()
686
+ st.session_state.response_generator = process_message(
687
+ st.session_state.transcription,
688
+ st.session_state.operation_prompt,
689
+ st.session_state.enable_tts_for_input_from_audio_record,
690
+ st.session_state.system_prompt
691
+ )
692
+
693
+ for response_chunk in st.session_state.response_generator:
694
+ message_placeholder.markdown(response_chunk)
695
+ st.session_state.end_response = st.session_state.response_generator.close()
696
+ if st.session_state.full_response != "":
697
+ message_placeholder.markdown(st.session_state.full_response)
698
+
699
+ if st.session_state.enable_tts_for_input_from_audio_record:
700
+ st.session_state.tts_audio, st.session_state.tts_duration = process_tts_message(st.session_state.full_response)
701
+
702
+ if st.session_state.tts_audio:
703
+ st.session_state.audio_list.append(
704
+ ( st.session_state.tts_audio,
705
+ st.session_state.tts_duration )
706
+ )
707
+ else:
708
+ pass
709
+
710
+ if st.session_state.audio_list:
711
+ st.session_state.final_audio = concatenate_audio_files(st.session_state.audio_list)
712
+
713
+ with st.container(border=True):
714
+
715
+ # Générer un nom de fichier unique
716
+ st.session_state.timestamp = time.strftime("%Y%m%d-%H%M%S")
717
+ st.session_state.langues = "_".join([lang["iso-639-1"] for lang in st.session_state.selected_languages])
718
+ st.session_state.nom_fichier = f"reponse_audio_{st.session_state.langues}_{st.session_state.timestamp}.mp3"
719
+
720
+ st.audio(st.session_state.final_audio,
721
+ format="audio/mp3",
722
+ autoplay=st.session_state.autoplay_tts)
723
+
724
+ st.download_button(
725
+ label=f"📥 {get_translation('telecharger_audio')}",
726
+ data=st.session_state.final_audio,
727
+ file_name=st.session_state.nom_fichier,
728
+ mime="audio/mp3",
729
+ use_container_width=True,
730
+ type="primary",
731
+ key=f"download_button_{st.session_state.langues}_{st.session_state.timestamp}",
732
+ )
733
+
734
+ except Exception as e:
735
+ st.error(f"[AUDIO] - Erreur lors de l'exportation de l'audio : {str(e)}")
736
+ finally:
737
+ clear_inputs_garbages()
738
+ # if 'tmp_file' in locals():
739
+ # os.unlink(tmp_file.name)
740
+
741
+
742
+
743
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
744
 
745
 
746
  def clear_inputs_garbages(sessions_state_list: Optional[list] =
 
831
 
832
 
833
 
834
+ main_page()