Spaces:

Herishop
/

OpenAI-TTS

Sleeping

File size: 5,027 Bytes

a91dac8
 
 
5673c37
a91dac8
 
 
 
 
5673c37
 
a91dac8
 
5673c37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a91dac8
 
5673c37
a91dac8
 
 
 
 
 
 
 
 
 
 
 
 
 
5673c37
a91dac8
 
 
 
 
 
ba4d46e
a91dac8
 
 
 
 
 
 
 
 
 
 
5673c37
 
 
 
a91dac8
 
 
 
 
ebea340
a91dac8
 
5673c37
 
a91dac8
 
5df06b6
f7a4e72
5df06b6
5673c37
5df06b6
5673c37
 
 
5df06b6
a91dac8
5673c37
ebea340
a91dac8
5673c37
09eb8c9
a91dac8
5673c37
a91dac8
5673c37
a91dac8
 
 
 
 
5673c37
a91dac8
5673c37
a91dac8
5673c37
a91dac8
 
5673c37
a91dac8
19a0cd9
a91dac8
 
5673c37

import gradio as gr
from openai import OpenAI
import tempfile
from pydub import AudioSegment

# Hàm kiểm tra API key hợp lệ
def check_api_key(api_key):
    try:
        client = OpenAI(api_key=api_key)
        client.models.list()  # Kiểm tra kết nối API
        return True
    except Exception as e:
        print(f"Error: {e}")
        return False

# Hàm điều chỉnh pitch
def adjust_pitch(audio_path, pitch_factor=1.0):
    """
    Điều chỉnh pitch của file âm thanh.
    :param audio_path: Đường dẫn đến file âm thanh.
    :param pitch_factor: Hệ số điều chỉnh pitch:
        - pitch_factor > 1.0: Tăng pitch (giọng cao hơn).
        - pitch_factor < 1.0: Giảm pitch (giọng trầm hơn).
    :return: Đường dẫn đến file âm thanh đã điều chỉnh.
    """
    audio = AudioSegment.from_file(audio_path)
    new_frame_rate = int(audio.frame_rate * pitch_factor)
    pitched_audio = audio._spawn(audio.raw_data, overrides={
        "frame_rate": new_frame_rate
    }).set_frame_rate(audio.frame_rate)
    pitched_audio.export(audio_path, format="mp3")
    return audio_path

# Hàm TTS (Text to Speech)
def tts(text, model, voice, speed, api_key, audio_file=None, pitch_factor=1.0):
    # Kiểm tra tính hợp lệ của API key
    if not api_key or api_key.strip() == "":
        raise gr.Error('Please enter your OpenAI API Key')
    
    if not check_api_key(api_key):
        raise gr.Error('Invalid OpenAI API Key. Please enter a valid API key.')

    try:
        client = OpenAI(api_key=api_key)
        
        # Nếu người dùng tải lên tệp âm thanh, sử dụng Whisper để chuyển thành văn bản
        if audio_file:
            audio_file = open(audio_file, 'rb')
            transcript = client.audio.transcriptions.create(model='whisper-1', file=audio_file, response_format='text')
            text = transcript  # Lấy văn bản từ tệp âm thanh

        # Tạo yêu cầu TTS với tốc độ điều chỉnh
        response = client.audio.speech.create(
            model=model,
            voice=voice,
            input=text,
            speed=speed
        )

    except Exception as error:
        print(str(error))
        raise gr.Error("An error occurred while generating speech. Please check your API key and try again.")

    # Lưu âm thanh vào tệp tạm thời
    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
        temp_file.write(response.content)
        temp_file_path = temp_file.name

    # Điều chỉnh pitch nếu pitch_factor khác 1.0
    if pitch_factor != 1.0:
        temp_file_path = adjust_pitch(temp_file_path, pitch_factor)

    return temp_file_path

# Hàm giao diện Gradio
def gradio_interface():
    with gr.Blocks() as demo:
        gr.Markdown("# <center> OpenAI Text-To-Speech V2.0 </center>")

        with gr.Row():
            # Cột bên trái: Nhập Text, chọn Model, Voice, Speed, Pitch và nút Generate
            with gr.Column(scale=2):
                api_key = gr.Textbox(type='password', label='Enter your OpenAI API Key', placeholder='Enter your OpenAI API key')

                with gr.Row():
                    model = gr.Dropdown(choices=['tts-1', 'tts-1-hd'], label='Model', value='tts-1', elem_id="model-dropdown", interactive=True)
                    voice = gr.Dropdown(
                        choices=['alloy', 'echo', 'onyx', 'nova'], 
                        label='Voice Options', 
                        value='echo',
                        elem_id="voice-dropdown", 
                        interactive=True
                    )

                speed = gr.Slider(minimum=0.5, maximum=2.0, step=0.1, label="Speed", value=1.0)
                pitch = gr.Slider(minimum=0.5, maximum=2.0, step=0.1, label="Pitch", value=1.0)  # Thêm thanh trượt pitch

                with gr.Row():
                    text = gr.Textbox(label="Input Text", placeholder="Enter your text here")
                btn = gr.Button("Generate Speech")

            # Cột bên phải: Upload audio và Speech Output
            with gr.Column(scale=2):
                input_type = gr.Radio(["Text", "Audio"], label="Input Type", value="Text")
                audio_file = gr.File(label="Upload Audio File")
                output_audio = gr.Audio(label="Speech Output")

        # Quy trình xử lý
        def process_input(input_type, text, audio_file, api_key, speed, pitch):
            if input_type == "Text":
                return tts(text, model.value, voice.value, speed, api_key, pitch_factor=pitch)
            elif input_type == "Audio":
                return tts(None, model.value, voice.value, speed, api_key, audio_file.name, pitch_factor=pitch)

        # Thiết lập sự kiện cho việc nhấn nút
        btn.click(fn=process_input, inputs=[input_type, text, audio_file, api_key, speed, pitch], outputs=output_audio)

    demo.launch()

if __name__ == "__main__":
    gradio_interface()