ready / app.py
tester1hf's picture
Update app.py
92de730 verified
import gradio as gr
from g4f.client import Client
import json
import torch
import soundfile as sf
from openvoice_cli.__main__ import tune_one
import os
import uuid
import zipfile
import logging
import numpy as np
from concurrent.futures import ThreadPoolExecutor
import threading
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Initialize G4F client
client = Client()
# Silero TTS configuration
language = 'ru'
model_id = 'ru_v3'
speaker = 'baya' # Russian voice
sample_rate = 48000 # Sample rate for Russian model
device = torch.device('cpu')
# Load Silero TTS model with thread safety
model, _ = torch.hub.load(repo_or_dir='snakers4/silero-models',
model='silero_tts',
language=language,
speaker=model_id)
model.to(device)
tts_lock = threading.Lock() # Lock for TTS model thread safety
def get_task(prompt):
return f'''Ты генерируешь скрипт продолжения прямой линии с Владимиром Владимировичем Путиным. Вопросы телезрителей передаёт Киселёв - он максимально подобострастен, поддерживает Путина и Россию. Пиши в духе пропаганды пророссийской. Каждая реплика - не менее 150-200 символов. Английские названия при необходимости всегда транскрибируй кириллицей. Числительные также записывай кириллицей. Пиши в стиле кремлёвской пропаганды. Пиши в стереотипном стиле. Киселёв задаёт вопросы, обрашается к Путину как "Владимир Владимирович" или "Господин Президент". Путин отвечает. Всё максимально аутентично. Фразы Киселёва: {{"Киселёв":"фраза"}} Фразы Путина: {{"Путин":"фраза"}} Ответ дай в формате JSON без дополнительных символов: [{{"Киселёв":"фраза"}}, {{"Путин":"фраза"}} . . . ].
Вопрос от пользователя поступил: "{prompt}"'''
def validate_response(response):
try:
data = json.loads(response)
if isinstance(data, list) and all(isinstance(item, dict) and len(item) == 1 for item in data):
return True
except json.JSONDecodeError:
pass
return False
def generate_text(prompt):
max_retries = 4
for attempt in range(max_retries):
logger.info(f"Generating response for prompt: {prompt} (attempt {attempt+1})")
response = client.chat.completions.create(
model="llama-3.3-70b",
messages=[{"role": "user", "content": get_task(prompt)}],
web_search=False
)
response_text = response.choices[0].message.content
logger.info(f"Generated response: {response_text}")
if validate_response(response_text):
return response_text
logger.warning("Invalid response format, retrying...")
logger.error("Failed to generate valid response after 4 attempts")
return '[{"Киселёв":"К сожалению, не удалось расслышать вопрос. Пожалуйста, попробуйте еще раз."}, {"Путин":"Мы работаем над улучшением системы. Спасибо за понимание."}]'
def split_text(text, max_length=800):
"""Split text into chunks of maximum length, trying to preserve word boundaries"""
chunks = []
while len(text) > max_length:
split_at = text.rfind(' ', 0, max_length)
if split_at == -1:
split_at = max_length
chunks.append(text[:split_at])
text = text[split_at:].lstrip()
chunks.append(text)
return chunks
def generate_audio(text, speaker_name):
"""Generate audio with thread-safe splitting and synthesis"""
logger.info(f"Generating audio for {speaker_name} ({len(text)} chars)")
chunks = split_text(text)
audio_arrays = []
for chunk in chunks:
with tts_lock: # Ensure thread-safe TTS operations
audio = model.apply_tts(
ssml_text=f"<speak>{chunk}</speak>",
speaker=speaker,
sample_rate=sample_rate,
put_accent=True,
put_yo=True
)
audio_arrays.append(audio)
full_audio = np.concatenate(audio_arrays)
temp_filename = f"temp_{uuid.uuid4().hex}.wav"
sf.write(temp_filename, full_audio, sample_rate)
return temp_filename
def process_line(args):
"""Process single dialogue line with enhanced error handling"""
idx, speaker, text = args
final_filename = f"t{idx+1}-{speaker}.wav"
base_audio = None
output_filename = None
try:
logger.info(f"Processing line {idx+1} for {speaker}")
# Generate base audio
base_audio = generate_audio(text, speaker)
if not os.path.exists(base_audio):
logger.error(f"Base audio not generated for line {idx+1}")
return None
# Generate voice cover
ref_audio = "kisel.mp3" if speaker == "Киселёв" else "putin.mp3"
output_filename = f"output_{uuid.uuid4().hex[:6]}.wav"
logger.debug(f"Tuning audio with reference: {ref_audio}")
tune_one(
input_file=base_audio,
ref_file=ref_audio,
output_file=output_filename,
device='cpu'
)
# Verify output file creation
if not os.path.exists(output_filename):
logger.error(f"Voice tuning failed for line {idx+1}")
return None
# Rename final file
os.rename(output_filename, final_filename)
logger.info(f"Created final file: {final_filename}")
return final_filename
except Exception as e:
logger.error(f"Error processing line {idx+1}: {str(e)}", exc_info=True)
return None
finally:
# Cleanup temporary files
for f in [base_audio, output_filename]:
if f and os.path.exists(f):
os.remove(f)
def process_prompt(prompt):
"""Main processing pipeline with parallel execution"""
logger.info(f"Starting processing for prompt: {prompt}")
# Generate script
script = generate_text(prompt)
script_data = json.loads(script)
# Prepare tasks for parallel processing
tasks = [(idx, speaker, text)
for idx, item in enumerate(script_data)
for speaker, text in item.items()]
# Process lines in parallel
audio_files = []
with ThreadPoolExecutor(max_workers=4) as executor: # Optimal for CPU-bound tasks
futures = [executor.submit(process_line, task) for task in tasks]
for future in futures:
result = future.result()
if result:
audio_files.append(result)
# Package results
zip_filename = "output_audio_files.zip"
with zipfile.ZipFile(zip_filename, 'w') as zipf:
for file in audio_files:
zipf.write(file)
# Cleanup working files
for file in audio_files:
os.remove(file)
return zip_filename
# Gradio interface
examples = [
"Почему такие высокие налоги?",
"Какие цели СВО?",
"Когда развалится Америка?"
]
with gr.Blocks() as demo:
gr.Markdown("# Kisel TV")
with gr.Row():
prompt_input = gr.Textbox(
label="Input Prompt",
placeholder="Enter your text here...",
lines=3
)
generate_btn = gr.Button("Generate", variant="primary")
output = gr.File(label="Generated Audio Files")
gr.Examples(
examples=examples,
inputs=prompt_input,
outputs=output,
fn=process_prompt,
cache_examples=False
)
generate_btn.click(
fn=process_prompt,
inputs=prompt_input,
outputs=output
)
if __name__ == "__main__":
demo.launch()