Spaces:

Herishop
/

OpenAI-TTS

Sleeping

App Files Files Community

Herishop commited on Mar 7, 2025

Commit

5673c37

verified ·

1 Parent(s): 27ac4c9

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -28

app.py CHANGED Viewed

@@ -1,25 +1,42 @@
 import gradio as gr
 from openai import OpenAI
 import tempfile
 # Hàm kiểm tra API key hợp lệ
 def check_api_key(api_key):
     try:
         client = OpenAI(api_key=api_key)
-        # Thực hiện một yêu cầu thử nghiệm đơn giản đến API OpenAI để kiểm tra kết nối
-        client.models.list()  # Gọi danh sách mô hình của OpenAI để kiểm tra kết nối
-        return True  # Nếu không có lỗi, API key hợp lệ
     except Exception as e:
         print(f"Error: {e}")
-        return False  # Nếu có lỗi, API key không hợp lệ
 # Hàm TTS (Text to Speech)
-def tts(text, model, voice, speed, api_key, audio_file=None):
     # Kiểm tra tính hợp lệ của API key
     if not api_key or api_key.strip() == "":
         raise gr.Error('Please enter your OpenAI API Key')
-    # Kiểm tra tính hợp lệ của API key
     if not check_api_key(api_key):
         raise gr.Error('Invalid OpenAI API Key. Please enter a valid API key.')
@@ -30,7 +47,7 @@ def tts(text, model, voice, speed, api_key, audio_file=None):
         if audio_file:
             audio_file = open(audio_file, 'rb')
             transcript = client.audio.transcriptions.create(model='whisper-1', file=audio_file, response_format='text')
-            text = transcript['text']  # Lấy văn bản từ tệp âm thanh
         # Tạo yêu cầu TTS với tốc độ điều chỉnh
         response = client.audio.speech.create(
@@ -38,7 +55,6 @@ def tts(text, model, voice, speed, api_key, audio_file=None):
             voice=voice,
             input=text,
             speed=speed
-            #pitch=0.8
         )
     except Exception as error:
@@ -50,55 +66,56 @@ def tts(text, model, voice, speed, api_key, audio_file=None):
         temp_file.write(response.content)
         temp_file_path = temp_file.name
     return temp_file_path
 # Hàm giao diện Gradio
 def gradio_interface():
     with gr.Blocks() as demo:
-        gr.Markdown("# <center> OpenAI Text-To-Speech with Speed Control </center>")
-        # Định dạng lại giao diện với hai cột
         with gr.Row():
-            # Cột bên trái: Nhập Text, chọn Model, Voice, Speed và nút Generate
-            with gr.Column(scale=2):  # Thêm `scale` để điều chỉnh kích thước cột
                 api_key = gr.Textbox(type='password', label='Enter your OpenAI API Key', placeholder='Enter your OpenAI API key')
-                # Cột cho Model và Voice Options cùng nằm trên một hàng ngang
                 with gr.Row():
                     model = gr.Dropdown(choices=['tts-1', 'tts-1-hd'], label='Model', value='tts-1', elem_id="model-dropdown", interactive=True)
                     voice = gr.Dropdown(
-                        choices=[
-                            'alloy', 'ash', 'coral', 'echo', 'fable', 'onyx', 'nova', 'sage', 'shimmer'
-                        ],
                         label='Voice Options',
-                        value='alloy',
-                        elem_id="voice-dropdown", interactive=True
                     )
-                speed = gr.Slider(minimum=0.5, maximum=2.0, step=0.1, label="Speed", value=0.8)
-                # Đặt Input Text vào hàng riêng biệt
-                with gr.Row():  # Tạo một hàng riêng biệt cho phần nhập Text
                     text = gr.Textbox(label="Input Text", placeholder="Enter your text here")
                 btn = gr.Button("Generate Speech")
             # Cột bên phải: Upload audio và Speech Output
-            with gr.Column(scale=2):  # Thêm `scale` để điều chỉnh kích thước cột
                 input_type = gr.Radio(["Text", "Audio"], label="Input Type", value="Text")
                 audio_file = gr.File(label="Upload Audio File")
                 output_audio = gr.Audio(label="Speech Output")
         # Quy trình xử lý
-        def process_input(input_type, text, audio_file, api_key):
             if input_type == "Text":
-                return tts(text, model.value, voice.value, speed.value, api_key)
             elif input_type == "Audio":
-                return tts(None, model.value, voice.value, speed.value, api_key, audio_file.name)
         # Thiết lập sự kiện cho việc nhấn nút
-        btn.click(fn=process_input, inputs=[input_type, text, audio_file, api_key], outputs=output_audio)
     demo.launch()
 if __name__ == "__main__":
-    gradio_interface()

 import gradio as gr
 from openai import OpenAI
 import tempfile
+from pydub import AudioSegment
 # Hàm kiểm tra API key hợp lệ
 def check_api_key(api_key):
     try:
         client = OpenAI(api_key=api_key)
+        client.models.list()  # Kiểm tra kết nối API
+        return True
     except Exception as e:
         print(f"Error: {e}")
+        return False
+# Hàm điều chỉnh pitch
+def adjust_pitch(audio_path, pitch_factor=1.0):
+    """
+    Điều chỉnh pitch của file âm thanh.
+    :param audio_path: Đường dẫn đến file âm thanh.
+    :param pitch_factor: Hệ số điều chỉnh pitch:
+        - pitch_factor > 1.0: Tăng pitch (giọng cao hơn).
+        - pitch_factor < 1.0: Giảm pitch (giọng trầm hơn).
+    :return: Đường dẫn đến file âm thanh đã điều chỉnh.
+    """
+    audio = AudioSegment.from_file(audio_path)
+    new_frame_rate = int(audio.frame_rate * pitch_factor)
+    pitched_audio = audio._spawn(audio.raw_data, overrides={
+        "frame_rate": new_frame_rate
+    }).set_frame_rate(audio.frame_rate)
+    pitched_audio.export(audio_path, format="mp3")
+    return audio_path
 # Hàm TTS (Text to Speech)
+def tts(text, model, voice, speed, api_key, audio_file=None, pitch_factor=1.0):
     # Kiểm tra tính hợp lệ của API key
     if not api_key or api_key.strip() == "":
         raise gr.Error('Please enter your OpenAI API Key')
     if not check_api_key(api_key):
         raise gr.Error('Invalid OpenAI API Key. Please enter a valid API key.')
         if audio_file:
             audio_file = open(audio_file, 'rb')
             transcript = client.audio.transcriptions.create(model='whisper-1', file=audio_file, response_format='text')
+            text = transcript  # Lấy văn bản từ tệp âm thanh
         # Tạo yêu cầu TTS với tốc độ điều chỉnh
         response = client.audio.speech.create(
             voice=voice,
             input=text,
             speed=speed
         )
     except Exception as error:
         temp_file.write(response.content)
         temp_file_path = temp_file.name
+    # Điều chỉnh pitch nếu pitch_factor khác 1.0
+    if pitch_factor != 1.0:
+        temp_file_path = adjust_pitch(temp_file_path, pitch_factor)
     return temp_file_path
 # Hàm giao diện Gradio
 def gradio_interface():
     with gr.Blocks() as demo:
+        gr.Markdown("# <center> OpenAI Text-To-Speech with Speed and Pitch Control </center>")
         with gr.Row():
+            # Cột bên trái: Nhập Text, chọn Model, Voice, Speed, Pitch và nút Generate
+            with gr.Column(scale=2):
                 api_key = gr.Textbox(type='password', label='Enter your OpenAI API Key', placeholder='Enter your OpenAI API key')
                 with gr.Row():
                     model = gr.Dropdown(choices=['tts-1', 'tts-1-hd'], label='Model', value='tts-1', elem_id="model-dropdown", interactive=True)
                     voice = gr.Dropdown(
+                        choices=['alloy', 'echo', 'onyx', 'nova'],
                         label='Voice Options',
+                        value='echo',
+                        elem_id="voice-dropdown",
+                        interactive=True
                     )
+                speed = gr.Slider(minimum=0.5, maximum=2.0, step=0.1, label="Speed", value=1.0)
+                pitch = gr.Slider(minimum=0.5, maximum=2.0, step=0.1, label="Pitch", value=0.8)  # Thêm thanh trượt pitch
+                with gr.Row():
                     text = gr.Textbox(label="Input Text", placeholder="Enter your text here")
                 btn = gr.Button("Generate Speech")
             # Cột bên phải: Upload audio và Speech Output
+            with gr.Column(scale=2):
                 input_type = gr.Radio(["Text", "Audio"], label="Input Type", value="Text")
                 audio_file = gr.File(label="Upload Audio File")
                 output_audio = gr.Audio(label="Speech Output")
         # Quy trình xử lý
+        def process_input(input_type, text, audio_file, api_key, speed, pitch):
             if input_type == "Text":
+                return tts(text, model.value, voice.value, speed, api_key, pitch_factor=pitch)
             elif input_type == "Audio":
+                return tts(None, model.value, voice.value, speed, api_key, audio_file.name, pitch_factor=pitch)
         # Thiết lập sự kiện cho việc nhấn nút
+        btn.click(fn=process_input, inputs=[input_type, text, audio_file, api_key, speed, pitch], outputs=output_audio)
     demo.launch()
 if __name__ == "__main__":
+    gradio_interface()