rick commited on
Commit
b717ac0
·
unverified ·
1 Parent(s): 509470f

...not done...

Browse files
Files changed (1) hide show
  1. pages/main.py +145 -29
pages/main.py CHANGED
@@ -63,7 +63,7 @@ def process_tts_message(text_response: str) -> Tuple[Optional[bytes], Optional[f
63
  st.error(f"Une erreur s'est produite lors de la conversion texte-parole : {e}")
64
  return None, None
65
 
66
- def split_audio(audio_file: str, max_size_mb: int = 25) -> List[str]:
67
  """
68
  Divise un fichier audio en segments de taille maximale spécifiée.
69
 
@@ -75,26 +75,35 @@ def split_audio(audio_file: str, max_size_mb: int = 25) -> List[str]:
75
  List[str]: Liste des chemins vers les segments audio divisés.
76
  """
77
  try:
78
- audio = AudioSegment.from_file(audio_file, format="wav")
 
 
 
 
 
 
 
 
79
  duration_ms = len(audio)
80
  segment_duration_ms = int((max_size_mb * 1024 * 1024 * 8) / (audio.frame_rate * audio.sample_width * audio.channels))
81
 
82
  segments = []
83
  for start in range(0, duration_ms, segment_duration_ms):
84
- tmp_file = tempfile.TemporaryFile()
85
  end = min(start + segment_duration_ms, duration_ms)
86
  segment = audio[start:end]
87
- segment.export(tmp_file, format="mp3")
88
- tmp_file.seek(0)
89
- segments.append(base64.b64encode(tmp_file.read()).decode())
90
- tmp_file.close()
91
 
 
92
  return segments
93
  except Exception as e:
94
  print(f"Erreur lors du découpage de l'audio : {e}")
95
  return []
96
 
97
- def transcribe_segment(segment_path: str, language: Optional[str] = None) -> str:
98
  """
99
  Transcrit un segment audio en texte.
100
 
@@ -106,26 +115,26 @@ def transcribe_segment(segment_path: str, language: Optional[str] = None) -> str
106
  str: Le texte transcrit.
107
  """
108
  try:
109
- with open(segment_path, "rb") as audio_segment:
110
- transcript = client.audio.transcriptions.create(
111
- model="whisper-1",
112
- file=audio_segment,
113
- language=language, # semble que language soit mal formatter au format ISO6391
114
- response_format="text"
115
- )
 
 
 
 
 
 
 
116
  return transcript
117
  except Exception as e:
118
- print(f"Erreur lors de la transcription du segment {segment_path} : {e}")
119
- print("\'"*3, end="")
120
- print("\n# # #\n{}\n# # #\n".format(
121
- language
122
- ),
123
- end=""
124
- )
125
- print("\'"*3, end="")
126
  return ""
127
 
128
- def transcribe_audio(audio_file: Union[str, IO], language: Optional[str] = None) -> str:
129
  """
130
  Transcrit un fichier audio en texte.
131
 
@@ -140,30 +149,43 @@ def transcribe_audio(audio_file: Union[str, IO], language: Optional[str] = None)
140
 
141
  try:
142
  with st.status("Transcription de l'audio en cours...") as status:
143
- audio_file_path = audio_file if isinstance(audio_file, str) else audio_file.name
144
- file_size_mb = os.path.getsize(audio_file_path) / (1024 * 1024)
 
 
 
 
 
 
 
145
 
146
  if file_size_mb > max_size_mb:
147
  status.update(label="Découpage de l'audio en segments...")
148
- segments = split_audio(audio_file_path, max_size_mb)
 
149
  full_transcript = ""
150
  for i, segment in enumerate(segments):
151
- status.update(label=f"Transcription du segment {i+1}/{len(base64.b64decode(segments.encode()))}...")
152
  transcript = transcribe_segment(
153
  base64.b64decode(segment.encode()),
154
  language
155
  )
156
  full_transcript += f"{transcript} "
157
  status.update(label="Transcription terminée", state="complete")
 
158
  return full_transcript.strip()
159
  else:
160
  status.update(label="Transcription de l'audio...")
161
- transcript = transcribe_segment(audio_file_path, language)
 
162
  status.update(label="Transcription terminée", state="complete")
 
163
  return transcript
164
  except Exception as e:
165
  st.error(f"Erreur lors de la transcription : {e}")
166
  return ""
 
 
167
 
168
 
169
  def detect_language(input_text: str, temperature: float = 0.01) -> str:
@@ -639,6 +661,100 @@ def main_page():
639
  # st.write(f"🗣️ {get_translation('enregistrez_message')}")
640
 
641
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
642
  def clear_inputs_garbages(sessions_state_list: Optional[list] =
643
  [ 'transcription', 'operation_prompt', 'system_prompt',
644
  'audio_list', 'full_response', 'tts_audio',
 
63
  st.error(f"Une erreur s'est produite lors de la conversion texte-parole : {e}")
64
  return None, None
65
 
66
+ def split_audio(audio_data: Union[str, bytes], max_size_mb: int = 25) -> List[str]:
67
  """
68
  Divise un fichier audio en segments de taille maximale spécifiée.
69
 
 
75
  List[str]: Liste des chemins vers les segments audio divisés.
76
  """
77
  try:
78
+ temp_audio_file = tempfile.TemporaryFile()
79
+ if isinstance(audio_data, str):
80
+ temp_audio_file.write(audio_data.encode())
81
+ temp_audio_file.seek(0)
82
+ else:
83
+ temp_audio_file.write(audio_data)
84
+ temp_audio_file.seek(0)
85
+
86
+ audio = AudioSegment.from_file(temp_audio_file, format="wav")
87
  duration_ms = len(audio)
88
  segment_duration_ms = int((max_size_mb * 1024 * 1024 * 8) / (audio.frame_rate * audio.sample_width * audio.channels))
89
 
90
  segments = []
91
  for start in range(0, duration_ms, segment_duration_ms):
92
+ tmp_seg_file = tempfile.TemporaryFile()
93
  end = min(start + segment_duration_ms, duration_ms)
94
  segment = audio[start:end]
95
+ segment.export(tmp_seg_file, format="mp3")
96
+ tmp_seg_file.seek(0)
97
+ segments.append(base64.b64encode(tmp_seg_file.read()).decode())
98
+ tmp_seg_file.close()
99
 
100
+ temp_audio_file.close()
101
  return segments
102
  except Exception as e:
103
  print(f"Erreur lors du découpage de l'audio : {e}")
104
  return []
105
 
106
+ def transcribe_segment(segment_data: Union[str, bytes], language: Optional[str] = None) -> str:
107
  """
108
  Transcrit un segment audio en texte.
109
 
 
115
  str: Le texte transcrit.
116
  """
117
  try:
118
+ audio_segment = tempfile.TemporaryFile()
119
+ if isinstance(segment_data, str):
120
+ audio_segment.write(segment_data.encode())
121
+ else:
122
+ audio_segment.write(segment_data)
123
+
124
+ audio_segment.seek(0)
125
+ transcript = client.audio.transcriptions.create(
126
+ model="whisper-1",
127
+ file=audio_segment,
128
+ language=language, # semble que language soit mal formatter au format ISO6391
129
+ response_format="text"
130
+ )
131
+ audio_segment.close()
132
  return transcript
133
  except Exception as e:
134
+ print(f"Erreur lors de la transcription du segment : {e}")
 
 
 
 
 
 
 
135
  return ""
136
 
137
+ def transcribe_audio(audio_data: Union[str, bytes], language: Optional[str] = None) -> str:
138
  """
139
  Transcrit un fichier audio en texte.
140
 
 
149
 
150
  try:
151
  with st.status("Transcription de l'audio en cours...") as status:
152
+ temp_audio_file = tempfile.TemporaryFile()
153
+ if isinstance(audio_data, str):
154
+ temp_audio_file.write(audio_data.encode())
155
+ temp_audio_file.seek(0)
156
+ elif isinstance(audio_data, bytes):
157
+ temp_audio_file.write(audio_data)
158
+ temp_audio_file.seek(0)
159
+
160
+ file_size_mb = temp_audio_file.tell() / (1024 * 1024)
161
 
162
  if file_size_mb > max_size_mb:
163
  status.update(label="Découpage de l'audio en segments...")
164
+ temp_audio_file.seek(0)
165
+ segments = split_audio(temp_audio_file.read(), max_size_mb)
166
  full_transcript = ""
167
  for i, segment in enumerate(segments):
168
+ status.update(label=f"Transcription du segment {i+1}/{len(segments)}...")
169
  transcript = transcribe_segment(
170
  base64.b64decode(segment.encode()),
171
  language
172
  )
173
  full_transcript += f"{transcript} "
174
  status.update(label="Transcription terminée", state="complete")
175
+
176
  return full_transcript.strip()
177
  else:
178
  status.update(label="Transcription de l'audio...")
179
+ temp_audio_file.seek(0)
180
+ transcript = transcribe_segment(temp_audio_file.read(), language)
181
  status.update(label="Transcription terminée", state="complete")
182
+
183
  return transcript
184
  except Exception as e:
185
  st.error(f"Erreur lors de la transcription : {e}")
186
  return ""
187
+ finally:
188
+ temp_audio_file.close()
189
 
190
 
191
  def detect_language(input_text: str, temperature: float = 0.01) -> str:
 
661
  # st.write(f"🗣️ {get_translation('enregistrez_message')}")
662
 
663
 
664
+ elif st.session_state.audio:
665
+ # Traitement de l'entrée audio de l'utilisateur
666
+ if len(st.session_state.audio) > 0:
667
+ tmp_file = tempfile.TemporaryFile()
668
+ st.session_state.audio.export(tmp_file, format="wav")
669
+
670
+ st.write(f"Frame rate: {st.session_state.audio.frame_rate}, Frame width: {st.session_state.audio.frame_width}, Duration: {st.session_state.audio.duration_seconds} seconds")
671
+
672
+ # Transcrire l'audio en texte
673
+ st.session_state.transcription = transcribe_audio(tmp_file, language=st.session_state.language_detected)
674
+ tmp_file.close()
675
+
676
+ # Detecter la langue du texte transcrit (si la langue source n'est pas détectée)
677
+ if st.session_state.language_detected is None:
678
+ st.session_state.language_detected = detect_language(
679
+ input_text=st.session_state.transcription, temperature=0.01
680
+ )
681
+ st.markdown(
682
+ f"- {get_translation('langue_detectee')}".format(
683
+ f"{convert_iso6391_to_language_name(st.session_state.language_detected)}"
684
+ )
685
+ )
686
+ st.markdown(
687
+ f"🎤 {get_translation('transcription_audio')}".format(
688
+ f"{st.session_state.transcription}"
689
+ )
690
+ )
691
+
692
+ st.session_state.audio_list = []
693
+ for cursor_selected_lang in st.session_state.selected_languages:
694
+ st.session_state.target_language = cursor_selected_lang["iso-639-1"]
695
+ st.session_state.full_response = ""
696
+
697
+ # Initialisation du mode de traitement pour la langue cible actuelle
698
+ st.session_state.system_prompt, st.session_state.operation_prompt = init_process_mode(from_lang=
699
+ (
700
+ st.session_state.language_detected if "language_detected" in st.session_state.language_detected else convert_language_name_to_iso6391(
701
+ st.session_state.interface_language
702
+ )
703
+ ),
704
+ to_lang=st.session_state.target_language
705
+ )
706
+
707
+ with st.chat_message("assistant", avatar="👻"):
708
+ message_placeholder = st.empty()
709
+ st.session_state.response_generator = process_message(
710
+ st.session_state.transcription,
711
+ st.session_state.operation_prompt,
712
+ st.session_state.enable_tts_for_input_from_audio_record,
713
+ st.session_state.system_prompt
714
+ )
715
+
716
+ for response_chunk in st.session_state.response_generator:
717
+ message_placeholder.markdown(response_chunk)
718
+ st.session_state.end_response = st.session_state.response_generator.close()
719
+ if st.session_state.full_response != "":
720
+ message_placeholder.markdown(st.session_state.full_response)
721
+
722
+ if st.session_state.enable_tts_for_input_from_audio_record:
723
+ st.session_state.tts_audio, st.session_state.tts_duration = process_tts_message(st.session_state.full_response)
724
+
725
+ if st.session_state.tts_audio:
726
+ st.session_state.audio_list.append(
727
+ ( st.session_state.tts_audio,
728
+ st.session_state.tts_duration )
729
+ )
730
+ else:
731
+ pass
732
+
733
+ if st.session_state.audio_list:
734
+ st.session_state.final_audio = concatenate_audio_files(st.session_state.audio_list)
735
+
736
+ with st.container(border=True):
737
+
738
+ # Générer un nom de fichier unique
739
+ st.session_state.timestamp = time.strftime("%Y%m%d-%H%M%S")
740
+ st.session_state.langues = "_".join([lang["iso-639-1"] for lang in st.session_state.selected_languages])
741
+ st.session_state.nom_fichier = f"reponse_audio_{st.session_state.langues}_{st.session_state.timestamp}.mp3"
742
+
743
+ st.audio(st.session_state.final_audio, format="audio/mp3", autoplay=st.session_state.autoplay_tts)
744
+
745
+ st.download_button(
746
+ label=f"📥 {get_translation('telecharger_audio')}",
747
+ data=st.session_state.final_audio,
748
+ file_name=st.session_state.nom_fichier,
749
+ mime="audio/mp3",
750
+ use_container_width=True,
751
+ type="primary",
752
+ key=f"download_button_{st.session_state.langues}_{st.session_state.timestamp}",
753
+ )
754
+ #
755
+ clear_inputs_garbages()
756
+
757
+
758
  def clear_inputs_garbages(sessions_state_list: Optional[list] =
759
  [ 'transcription', 'operation_prompt', 'system_prompt',
760
  'audio_list', 'full_response', 'tts_audio',