OpenAI_TTS_New

Runtime error

File size: 4,436 Bytes

aa95dd5
 
 
02bfa9c
 
530748c
aa95dd5
02bfa9c
 
aa95dd5
ef8bdbc
 
 
 
cd4f0ff
02bfa9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd4f0ff
3a94a9b
7d0efdb
3a94a9b
02bfa9c
 
 
 
 
 
 
3a94a9b
02bfa9c
 
 
7d0efdb
096012d
 
02bfa9c
 
7d0efdb
02bfa9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa95dd5
 
 
e6a02cd
2240547
096012d
2086e4a
3a94a9b
096012d
3a94a9b
9873378
aa95dd5
2240547
02bfa9c
ae677e4
aa95dd5
 
cd4f0ff
ef8bdbc
 
cd4f0ff
 
 
3a94a9b
 
096012d

import gradio as gr
import os
import tempfile
import re
from pydub import AudioSegment  # Library to combine audio files
from openai import OpenAI

# Max character limit per API request
MAX_CHAR_LIMIT = 4096

def clean_text(text):
    # Replace newlines with spaces and multiple spaces with a single space
    cleaned_text = re.sub(r'\s+', ' ', text.strip())  # Replace multiple spaces and newlines with a single space
    return cleaned_text

def split_text(text, limit=MAX_CHAR_LIMIT):
    # Split text into chunks of <= MAX_CHAR_LIMIT characters
    words = text.split(' ')
    chunks = []
    current_chunk = ""
    
    for word in words:
        # Add words to the current chunk without exceeding the character limit
        if len(current_chunk) + len(word) + 1 <= limit:  # +1 for space
            current_chunk += word + " "
        else:
            chunks.append(current_chunk.strip())  # Append the current chunk
            current_chunk = word + " "  # Start a new chunk
    
    if current_chunk:
        chunks.append(current_chunk.strip())  # Add the last chunk
    
    return chunks

def tts(text, model, voice, speed, api_key, base_url):
    if api_key == '':
        raise gr.Error('Please enter your Key')
    
    cleaned_text = clean_text(text)
    chunks = split_text(cleaned_text)
    
    audio_segments = []

    try:
        client = OpenAI(api_key=api_key, base_url=base_url+'/v1')  # Use selected base_url
        
        # Process each chunk of text
        for chunk in chunks:
            response = client.audio.speech.create(
                model=model,  # "tts-1", "tts-1-hd"
                voice=voice,  # 'alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'
                input=chunk,
                speed=speed
            )
            
            # Create a temp file to save the audio for each chunk
            with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
                temp_file.write(response.content)
                temp_file_path = temp_file.name
                audio_segments.append(AudioSegment.from_mp3(temp_file_path))
    
    except Exception as error:
        raise gr.Error("An error occurred while generating speech. Please check your API key and try again.")
    
    # Concatenate all audio chunks into one final audio file
    final_audio = sum(audio_segments)
    
    # Save the concatenated audio to a final file
    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as final_temp_file:
        final_audio.export(final_temp_file.name, format="mp3")
        final_audio_path = final_temp_file.name

    return final_audio_path


with gr.Blocks() as demo:
    gr.Markdown("# <center> OpenAI TTS Unlimited Character </center>")
    with gr.Row(variant='panel'):
        api_key = gr.Textbox(type='password', label='OpenAI API Key', placeholder='Enter your API key to access the TTS demo')
        model = gr.Dropdown(choices=['tts-1', 'tts-1-hd', 'tts-1-1106', 'tts-1-hd-1106'], label='Model', value='tts-1')
        voice = gr.Dropdown(choices=['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'], label='Voice Options', value='alloy')
        speed = gr.Slider(minimum=0.5, maximum=2.0, step=0.1, label="Speed", value=1.0)
        # Add dropdown for URL selection
        base_url = gr.Dropdown(choices=['https://gpt1.shupremium.com', 'https://gpt1.shupremium.com','https://gpt2.shupremium.com','https://gpt3.shupremium.com' ,'https://gpt4.shupremium.com', 'https://gpt5.shupremium.com'], label="API Endpoint", value='https://gpt5.shupremium.com')

    text = gr.Textbox(label="Input text", placeholder="Enter your text and then click on the 'Text-To-Speech' button, or simply press the Enter key.")
    char_counter = gr.Markdown("Character count: 0")
    btn = gr.Button("Text-To-Speech")
    output_audio = gr.Audio(label="Speech Output")

    def update_char_counter(text):
        cleaned_text = clean_text(text)  # Clean the text by removing extra spaces and newlines
        return f"Character count: {len(cleaned_text)}"

    text.change(fn=update_char_counter, inputs=text, outputs=char_counter)

    text.submit(fn=tts, inputs=[text, model, voice, speed, api_key, base_url], outputs=output_audio, api_name="tts_enter_key", concurrency_limit=None)
    btn.click(fn=tts, inputs=[text, model, voice, speed, api_key, base_url], outputs=output_audio, api_name="tts_button", concurrency_limit=None)

demo.launch()