Spaces:
Paused
Paused
IamPolymath commited on
Commit ·
9237b52
1
Parent(s): 8b80819
minors changes
Browse files- core/speech_to_text.py +67 -11
core/speech_to_text.py
CHANGED
|
@@ -14,6 +14,8 @@ from typing import Optional
|
|
| 14 |
#from typing import Tuple
|
| 15 |
from typing import Union
|
| 16 |
|
|
|
|
|
|
|
| 17 |
def huggingface_endpoints_stt(fichier_audio: str) -> str:
|
| 18 |
# Définir l'URL de l'endpoint d'inférence sur Hugging Face
|
| 19 |
API_URL = f"{getenv("hf_endpoint_whisper_large_v3_turbo")}"
|
|
@@ -162,22 +164,76 @@ def translate_audio(filepath: Union[str, IO]) -> str:
|
|
| 162 |
|
| 163 |
|
| 164 |
|
| 165 |
-
# Exemple d'utilisation de la fonction
|
| 166 |
-
if __name__ == "__main__":
|
| 167 |
-
fichier_audio = "sample_1.wav" # Remplacez par votre fichier audio
|
| 168 |
-
try:
|
| 169 |
-
transcription = huggingface_endpoints_stt(fichier_audio)
|
| 170 |
-
print(f"Transcription : {transcription}")
|
| 171 |
-
except Exception as e:
|
| 172 |
-
print(f"Une erreur est survenue : {e}")
|
| 173 |
|
| 174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
|
|
|
|
|
|
| 180 |
|
|
|
|
| 181 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
|
|
|
|
| 14 |
#from typing import Tuple
|
| 15 |
from typing import Union
|
| 16 |
|
| 17 |
+
from core.DetectLanguage import detect_language
|
| 18 |
+
|
| 19 |
def huggingface_endpoints_stt(fichier_audio: str) -> str:
|
| 20 |
# Définir l'URL de l'endpoint d'inférence sur Hugging Face
|
| 21 |
API_URL = f"{getenv("hf_endpoint_whisper_large_v3_turbo")}"
|
|
|
|
| 164 |
|
| 165 |
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
|
| 168 |
|
| 169 |
+
# ############################################################
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
class SpeechToText(object):
|
| 173 |
+
def __init__(self,
|
| 174 |
+
api_key: str):
|
| 175 |
+
self.api_key = api_key
|
| 176 |
+
self.client = OpenAI(api_key=self.api_key)
|
| 177 |
+
|
| 178 |
+
def aquire_audio(self,
|
| 179 |
+
filepath: Union[str, IO, List[Union[str, IO]]]):
|
| 180 |
+
"""
|
| 181 |
+
Integrer la detection de langue :
|
| 182 |
+
Ajoute un appel a la fonction detect_language juste apres l'aquisition de l'audio et avant de choisir entre transcrire ou traduire.
|
| 183 |
+
"""
|
| 184 |
+
if isinstance(filepath, str):
|
| 185 |
+
file_paths = [filepath]
|
| 186 |
+
elif isinstance(filepath, IO):
|
| 187 |
+
file_paths = [filepath.name]
|
| 188 |
+
else:
|
| 189 |
+
file_paths = [ f"{file_path}" for file_path in filepath if isinstance(filepath, List[str]) else file_path.name for file_path in filepath ]
|
| 190 |
+
|
| 191 |
+
# create the list 'file_streams'
|
| 192 |
+
file_streams = [open(f, "rb") for path in file_paths]
|
| 193 |
+
|
| 194 |
+
def create_assistant():
|
| 195 |
+
nonlocal self.client
|
| 196 |
+
return self.client.beta.assistants.create(
|
| 197 |
+
name="Audio Language Detector",
|
| 198 |
+
instructions=" ".join([
|
| 199 |
+
"Act as an language detection function for an audio file.",
|
| 200 |
+
"You are the assistant designed to detect the language of an audio file.",
|
| 201 |
+
"This assistant is designed to detect the language of an audio file.",
|
| 202 |
+
"You receive an audio file as input, and you analyze it to determine the language spoken in the audio.",
|
| 203 |
+
"The assistant will return the detected language of the audio in ISO 639-1 format.",
|
| 204 |
+
""
|
| 205 |
+
]),
|
| 206 |
+
model="gpt-4o",
|
| 207 |
+
tools=[{"type": "file_search"}]
|
| 208 |
+
)
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
def create_vector_store():
|
| 212 |
+
nonlocal self.client
|
| 213 |
+
return self.client.beta.vector_stores.create(
|
| 214 |
+
name="Audio Language Detection"
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
assistant = create_assistant()
|
| 219 |
+
vectore_store = create_vector_store()
|
| 220 |
|
| 221 |
+
file_batch = self.client.beta.vector_stores.file_batches.upload_and_poll(
|
| 222 |
+
vector_store_id=vectore_store.id,
|
| 223 |
+
files=file_streams
|
| 224 |
+
)
|
| 225 |
|
| 226 |
+
# update the assistant to use the vector store
|
| 227 |
+
assistant = self.client.beta.assistants.update(
|
| 228 |
+
assistant_id=assistant.id,
|
| 229 |
+
tool_ressources={"file_search": {"vector_store_ids": [vectore_store.id]}}
|
| 230 |
+
)
|
| 231 |
|
| 232 |
+
## Create a thread
|
| 233 |
|
| 234 |
+
### Upload the user provided audio
|
| 235 |
+
message_file = self.client.files.create(
|
| 236 |
+
file=open(file_paths[0], "rb")
|
| 237 |
+
)
|
| 238 |
|
| 239 |
|