IamPolymath commited on
Commit
9237b52
·
1 Parent(s): 8b80819

minors changes

Browse files
Files changed (1) hide show
  1. core/speech_to_text.py +67 -11
core/speech_to_text.py CHANGED
@@ -14,6 +14,8 @@ from typing import Optional
14
  #from typing import Tuple
15
  from typing import Union
16
 
 
 
17
  def huggingface_endpoints_stt(fichier_audio: str) -> str:
18
  # Définir l'URL de l'endpoint d'inférence sur Hugging Face
19
  API_URL = f"{getenv("hf_endpoint_whisper_large_v3_turbo")}"
@@ -162,22 +164,76 @@ def translate_audio(filepath: Union[str, IO]) -> str:
162
 
163
 
164
 
165
- # Exemple d'utilisation de la fonction
166
- if __name__ == "__main__":
167
- fichier_audio = "sample_1.wav" # Remplacez par votre fichier audio
168
- try:
169
- transcription = huggingface_endpoints_stt(fichier_audio)
170
- print(f"Transcription : {transcription}")
171
- except Exception as e:
172
- print(f"Une erreur est survenue : {e}")
173
 
174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
 
 
 
 
176
 
177
- """
178
- Supported content types are:\n application/json, application/json; charset=UTF-8, text/csv, text/plain, image/png, image/jpeg, image/jpg, image/tiff, image/bmp, image/gif, image/webp, image/x-image, audio/x-flac, audio/flac, audio/mpeg, audio/x-mpeg-3, audio/wave, audio/wav, audio/x-wav, audio/ogg, audio/x-audio, audio/webm, audio/webm;codecs=opus, audio/AMR, audio/amr, audio/AMR-WB, audio/AMR-WB+, audio/m4a, audio/x-m4a\n
179
- """
 
 
180
 
 
181
 
 
 
 
 
182
 
183
 
 
14
  #from typing import Tuple
15
  from typing import Union
16
 
17
+ from core.DetectLanguage import detect_language
18
+
19
  def huggingface_endpoints_stt(fichier_audio: str) -> str:
20
  # Définir l'URL de l'endpoint d'inférence sur Hugging Face
21
  API_URL = f"{getenv("hf_endpoint_whisper_large_v3_turbo")}"
 
164
 
165
 
166
 
 
 
 
 
 
 
 
 
167
 
168
 
169
+ # ############################################################
170
+
171
+
172
+ class SpeechToText(object):
173
+ def __init__(self,
174
+ api_key: str):
175
+ self.api_key = api_key
176
+ self.client = OpenAI(api_key=self.api_key)
177
+
178
+ def aquire_audio(self,
179
+ filepath: Union[str, IO, List[Union[str, IO]]]):
180
+ """
181
+ Integrer la detection de langue :
182
+ Ajoute un appel a la fonction detect_language juste apres l'aquisition de l'audio et avant de choisir entre transcrire ou traduire.
183
+ """
184
+ if isinstance(filepath, str):
185
+ file_paths = [filepath]
186
+ elif isinstance(filepath, IO):
187
+ file_paths = [filepath.name]
188
+ else:
189
+ file_paths = [ f"{file_path}" for file_path in filepath if isinstance(filepath, List[str]) else file_path.name for file_path in filepath ]
190
+
191
+ # create the list 'file_streams'
192
+ file_streams = [open(f, "rb") for path in file_paths]
193
+
194
+ def create_assistant():
195
+ nonlocal self.client
196
+ return self.client.beta.assistants.create(
197
+ name="Audio Language Detector",
198
+ instructions=" ".join([
199
+ "Act as an language detection function for an audio file.",
200
+ "You are the assistant designed to detect the language of an audio file.",
201
+ "This assistant is designed to detect the language of an audio file.",
202
+ "You receive an audio file as input, and you analyze it to determine the language spoken in the audio.",
203
+ "The assistant will return the detected language of the audio in ISO 639-1 format.",
204
+ ""
205
+ ]),
206
+ model="gpt-4o",
207
+ tools=[{"type": "file_search"}]
208
+ )
209
+
210
+
211
+ def create_vector_store():
212
+ nonlocal self.client
213
+ return self.client.beta.vector_stores.create(
214
+ name="Audio Language Detection"
215
+ )
216
+
217
+
218
+ assistant = create_assistant()
219
+ vectore_store = create_vector_store()
220
 
221
+ file_batch = self.client.beta.vector_stores.file_batches.upload_and_poll(
222
+ vector_store_id=vectore_store.id,
223
+ files=file_streams
224
+ )
225
 
226
+ # update the assistant to use the vector store
227
+ assistant = self.client.beta.assistants.update(
228
+ assistant_id=assistant.id,
229
+ tool_ressources={"file_search": {"vector_store_ids": [vectore_store.id]}}
230
+ )
231
 
232
+ ## Create a thread
233
 
234
+ ### Upload the user provided audio
235
+ message_file = self.client.files.create(
236
+ file=open(file_paths[0], "rb")
237
+ )
238
 
239