File size: 5,407 Bytes
964514c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
026b176
964514c
026b176
 
 
964514c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1dc4889
964514c
 
 
 
 
 
 
1dc4889
964514c
 
 
 
 
 
1dc4889
964514c
 
 
 
 
 
 
 
 
 
1dc4889
964514c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1dc4889
 
 
964514c
 
 
1dc4889
964514c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
026b176
 
23bc671
 
 
 
 
 
 
 
 
 
 
 
 
 
026b176
23bc671
026b176
 
 
 
23bc671
 
026b176
23bc671
 
 
 
 
 
 
 
 
 
 
026b176
23bc671
026b176
 
23bc671
 
964514c
 
 
23bc671
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#coding: utf-8

import os
import tempfile


#from typing import Any
#from typing import Dict
#from typing import IO
#from typing import List
from typing import Optional
from typing import Tuple
#from typing import Union
from base64 import b64encode

from openai import OpenAI
from pydub import AudioSegment
import streamlit as st

#from dotenv import load_dotenv
# Charger les variables d'environnement depuis le fichier .env
#load_dotenv()

class openai_tts(object):
    def __init__(self,                  
                 tts_voice: Optional[str] = "nova",
                 tts_model: Optional[str] = "tts-1",
                 response_format: Optional[str] = "mp3",
                 speed: Optional[float] = 1.0
                 ):
        self.client = None
        self.init_supported_formats__()
        self.init_api_client()

        if response_format:
            self.set_response_format(response_format)
        if tts_voice:
            self.set_tts_voice(tts_voice)
        if tts_model:
            self.set_tts_model(tts_model)
        if speed:
            self.set_tts_speed(speed)

    def set_tts_speed(self, speed):
        if not (0.25 <= speed <= 4.0):
            raise ValueError(f"[TTS] - Speed must be between 0.25 and 4.0. Provided value: {speed}")
        else:
            self.speed = speed
        return self

    def set_tts_voice(self, voice):
        voix_valides = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
        if voice not in voix_valides:
            raise ValueError(f"[TTS] - Invalid TTS voice: {voice}. Valid voices are: {', '.join(voix_valides)}.")
        else:
            self.tts_voice = voice
        return self

    def set_tts_model(self, model):
        if model not in ["tts-1", "tts-1-hd"]:
            raise ValueError(f"[TTS] - Invalid TTS model: {model}. Valid models are 'tts-1' and 'tts-1-hd'.")
        else:
            self.tts_model = model
        return self

    def init_supported_formats__(self):
        self.supported_formats = [ 'mp3', 'opus', 'aac', 'flac', 'wav', 'pcm' ]
        return self

    def set_response_format(self, format: str):
        if format not in self.supported_formats:
            raise ValueError(f"[TTS] - Unsupported format: {format}. Supported formats are: {', '.join(self.supported_formats)}")
        else:
            self.response_format = format
        return self
        
    def init_api_client(self):
        if not (self.client):
            # OpenAI client configuration with API key
            self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
        return self

    def text_to_speech(self, 
                    input_text: str) -> Tuple[Optional[bytes], float]:
        """
        Convertit du texte en parole en utilisant l'API OpenAI.

        Args:
            input_text (str): Le texte à convertir en parole.

        Returns:
            Dict[str, Union[float, str]]: Un dictionnaire contenant:
                - 'audio_duration' (float): La durée de l'audio en secondes.
                - 'data_bytes' (str): Les données audio encodées en base64.
        """
        response = self.client.audio.speech.create(
            model=self.tts_model,
            voice=self.tts_voice,
            input=input_text,
            response_format=self.response_format,
            speed=self.speed
        )
        data_output = response.read()

        tmp_file = tempfile.TemporaryFile()
        tmp_file.write(data_output)
        tmp_file.seek(0)
        audio = AudioSegment.from_file(tmp_file, format=self.response_format)
        duration = len(audio) / 1000
        tmp_file.close()
        
        return {
         "audio_duration": duration,
         "data_bytes": b64encode(data_output).decode()
        }


def process_tts_message(text_response: str) -> Tuple[Optional[bytes], Optional[float]]:
    """
    Convertit un texte en parole en utilisant l'API OpenAI TTS.
    
    Args:
        text_response (str): Le texte à convertir en parole.
        
    Returns:
        Tuple[Optional[bytes], Optional[float]]: Un tuple contenant les données audio encodées en base64 
        et la durée de l'audio, ou (None, None) en cas d'erreur.
    """
    if not text_response or not isinstance(text_response, str):
        st.error("Erreur : Le texte à convertir est invalide ou vide")
        return None, None

    try:
        tts = openai_tts(
            tts_voice=st.session_state.tts_voice,
            tts_model="tts-1",
            response_format="mp3",
            speed=1.0
        )
        tts_output_ = tts.text_to_speech(text_response)
        return tts_output_["data_bytes"], tts_output_["audio_duration"]
        
    except ValueError as ve:
        # Erreurs de validation (voix invalide, format non supporté, etc.)
        st.error(f"Erreur de configuration TTS : {ve}")
        return None, None
        
    except (KeyError, AttributeError) as ke:
        # Erreurs liées aux variables de session manquantes
        st.error("Erreur : Configuration TTS incomplète ou invalide")
        return None, None
        
    except Exception as e:
        # Autres erreurs (réseau, API, etc.)
        st.error(f"Une erreur s'est produite lors de la conversion texte-parole : {e}")
        return None, None


if __name__ == "__main__":
    
    openai_tts().text_to_speech("Hello, I am an AI assistant. How can I help you?")