Spaces:

Herishop
/

OpenAI-TTS

Sleeping

App Files Files Community

OpenAI-TTS / app.py

Herishop

Update app.py

19a0cd9 verified 7 months ago

raw

history blame contribute delete

5.03 kB

	import gradio as gr
	from openai import OpenAI
	import tempfile
	from pydub import AudioSegment

	# Hàm kiểm tra API key hợp lệ
	def check_api_key(api_key):
	try:
	client = OpenAI(api_key=api_key)
	client.models.list() # Kiểm tra kết nối API
	return True
	except Exception as e:
	print(f"Error: {e}")
	return False

	# Hàm điều chỉnh pitch
	def adjust_pitch(audio_path, pitch_factor=1.0):
	"""
	Điều chỉnh pitch của file âm thanh.
	:param audio_path: Đường dẫn đến file âm thanh.
	:param pitch_factor: Hệ số điều chỉnh pitch:
	- pitch_factor > 1.0: Tăng pitch (giọng cao hơn).
	- pitch_factor < 1.0: Giảm pitch (giọng trầm hơn).
	:return: Đường dẫn đến file âm thanh đã điều chỉnh.
	"""
	audio = AudioSegment.from_file(audio_path)
	new_frame_rate = int(audio.frame_rate * pitch_factor)
	pitched_audio = audio._spawn(audio.raw_data, overrides={
	"frame_rate": new_frame_rate
	}).set_frame_rate(audio.frame_rate)
	pitched_audio.export(audio_path, format="mp3")
	return audio_path

	# Hàm TTS (Text to Speech)
	def tts(text, model, voice, speed, api_key, audio_file=None, pitch_factor=1.0):
	# Kiểm tra tính hợp lệ của API key
	if not api_key or api_key.strip() == "":
	raise gr.Error('Please enter your OpenAI API Key')

	if not check_api_key(api_key):
	raise gr.Error('Invalid OpenAI API Key. Please enter a valid API key.')

	try:
	client = OpenAI(api_key=api_key)

	# Nếu người dùng tải lên tệp âm thanh, sử dụng Whisper để chuyển thành văn bản
	if audio_file:
	audio_file = open(audio_file, 'rb')
	transcript = client.audio.transcriptions.create(model='whisper-1', file=audio_file, response_format='text')
	text = transcript # Lấy văn bản từ tệp âm thanh

	# Tạo yêu cầu TTS với tốc độ điều chỉnh
	response = client.audio.speech.create(
	model=model,
	voice=voice,
	input=text,
	speed=speed
	)

	except Exception as error:
	print(str(error))
	raise gr.Error("An error occurred while generating speech. Please check your API key and try again.")

	# Lưu âm thanh vào tệp tạm thời
	with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
	temp_file.write(response.content)
	temp_file_path = temp_file.name

	# Điều chỉnh pitch nếu pitch_factor khác 1.0
	if pitch_factor != 1.0:
	temp_file_path = adjust_pitch(temp_file_path, pitch_factor)

	return temp_file_path

	# Hàm giao diện Gradio
	def gradio_interface():
	with gr.Blocks() as demo:
	gr.Markdown("# <center> OpenAI Text-To-Speech V2.0 </center>")

	with gr.Row():
	# Cột bên trái: Nhập Text, chọn Model, Voice, Speed, Pitch và nút Generate
	with gr.Column(scale=2):
	api_key = gr.Textbox(type='password', label='Enter your OpenAI API Key', placeholder='Enter your OpenAI API key')

	with gr.Row():
	model = gr.Dropdown(choices=['tts-1', 'tts-1-hd'], label='Model', value='tts-1', elem_id="model-dropdown", interactive=True)
	voice = gr.Dropdown(
	choices=['alloy', 'echo', 'onyx', 'nova'],
	label='Voice Options',
	value='echo',
	elem_id="voice-dropdown",
	interactive=True
	)

	speed = gr.Slider(minimum=0.5, maximum=2.0, step=0.1, label="Speed", value=1.0)
	pitch = gr.Slider(minimum=0.5, maximum=2.0, step=0.1, label="Pitch", value=1.0) # Thêm thanh trượt pitch

	with gr.Row():
	text = gr.Textbox(label="Input Text", placeholder="Enter your text here")
	btn = gr.Button("Generate Speech")

	# Cột bên phải: Upload audio và Speech Output
	with gr.Column(scale=2):
	input_type = gr.Radio(["Text", "Audio"], label="Input Type", value="Text")
	audio_file = gr.File(label="Upload Audio File")
	output_audio = gr.Audio(label="Speech Output")

	# Quy trình xử lý
	def process_input(input_type, text, audio_file, api_key, speed, pitch):
	if input_type == "Text":
	return tts(text, model.value, voice.value, speed, api_key, pitch_factor=pitch)
	elif input_type == "Audio":
	return tts(None, model.value, voice.value, speed, api_key, audio_file.name, pitch_factor=pitch)

	# Thiết lập sự kiện cho việc nhấn nút
	btn.click(fn=process_input, inputs=[input_type, text, audio_file, api_key, speed, pitch], outputs=output_audio)

	demo.launch()

	if __name__ == "__main__":
	gradio_interface()