Spaces:

ImPolymath
/

demorrha

Paused

App Files Files Community

IamPolymath commited on Oct 12, 2024

Commit

9237b52

1 Parent(s): 8b80819

minors changes

Browse files

Files changed (1) hide show

core/speech_to_text.py +67 -11

core/speech_to_text.py CHANGED Viewed

@@ -14,6 +14,8 @@ from typing import Optional
 #from typing import Tuple
 from typing import Union
 def huggingface_endpoints_stt(fichier_audio: str) -> str:
     # Définir l'URL de l'endpoint d'inférence sur Hugging Face
     API_URL = f"{getenv("hf_endpoint_whisper_large_v3_turbo")}"
@@ -162,22 +164,76 @@ def translate_audio(filepath: Union[str, IO]) -> str:
-# Exemple d'utilisation de la fonction
-if __name__ == "__main__":
-    fichier_audio = "sample_1.wav"  # Remplacez par votre fichier audio
-    try:
-        transcription = huggingface_endpoints_stt(fichier_audio)
-        print(f"Transcription : {transcription}")
-    except Exception as e:
-        print(f"Une erreur est survenue : {e}")
-"""
-Supported content types are:\n                application/json, application/json; charset=UTF-8, text/csv, text/plain, image/png, image/jpeg, image/jpg, image/tiff, image/bmp, image/gif, image/webp, image/x-image, audio/x-flac, audio/flac, audio/mpeg, audio/x-mpeg-3, audio/wave, audio/wav, audio/x-wav, audio/ogg, audio/x-audio, audio/webm, audio/webm;codecs=opus, audio/AMR, audio/amr, audio/AMR-WB, audio/AMR-WB+, audio/m4a, audio/x-m4a\n
-"""

 #from typing import Tuple
 from typing import Union
+from core.DetectLanguage import detect_language
 def huggingface_endpoints_stt(fichier_audio: str) -> str:
     # Définir l'URL de l'endpoint d'inférence sur Hugging Face
     API_URL = f"{getenv("hf_endpoint_whisper_large_v3_turbo")}"
+# ############################################################
+class SpeechToText(object):
+    def __init__(self,
+                 api_key: str):
+        self.api_key = api_key
+        self.client = OpenAI(api_key=self.api_key)
+    def aquire_audio(self,
+                     filepath: Union[str, IO, List[Union[str, IO]]]):
+        """
+        Integrer la detection de langue :
+         Ajoute un appel a la fonction detect_language juste apres l'aquisition de l'audio et avant de choisir entre transcrire ou traduire.
+        """
+        if isinstance(filepath, str):
+            file_paths = [filepath]
+        elif isinstance(filepath, IO):
+            file_paths = [filepath.name]
+        else:
+            file_paths = [ f"{file_path}" for file_path in filepath if isinstance(filepath, List[str]) else file_path.name for file_path in filepath ]
+        # create the list 'file_streams'
+        file_streams = [open(f, "rb") for path in file_paths]
+        def create_assistant():
+            nonlocal self.client
+            return self.client.beta.assistants.create(
+                     name="Audio Language Detector",
+                     instructions=" ".join([
+                        "Act as an language detection function for an audio file.",
+                        "You are the assistant designed to detect the language of an audio file.",
+                        "This assistant is designed to detect the language of an audio file.",
+                        "You receive an audio file as input, and you analyze it to determine the language spoken in the audio.",
+                        "The assistant will return the detected language of the audio in ISO 639-1 format.",
+                        ""
+                     ]),
+                     model="gpt-4o",
+                     tools=[{"type": "file_search"}]
+                    )
+        def create_vector_store():
+            nonlocal self.client
+            return self.client.beta.vector_stores.create(
+                     name="Audio Language Detection"
+                    )
+        assistant = create_assistant()
+        vectore_store = create_vector_store()
+        file_batch = self.client.beta.vector_stores.file_batches.upload_and_poll(
+            vector_store_id=vectore_store.id,
+            files=file_streams
+        )
+        # update the assistant to use the vector store
+        assistant = self.client.beta.assistants.update(
+            assistant_id=assistant.id,
+            tool_ressources={"file_search": {"vector_store_ids": [vectore_store.id]}}
+        )
+        ## Create a thread
+        ### Upload the user provided audio
+        message_file = self.client.files.create(
+            file=open(file_paths[0], "rb")
+        )